summaryrefslogtreecommitdiffstats
path: root/share/doc
diff options
context:
space:
mode:
authorrgrimes <rgrimes@FreeBSD.org>1994-05-30 19:09:18 +0000
committerrgrimes <rgrimes@FreeBSD.org>1994-05-30 19:09:18 +0000
commitb0d61785cae024b1f44119446a940ee14c9ac959 (patch)
tree5a495a583b002ae9e57f09848ae697160708c220 /share/doc
parentd43599f73ba5858e573c7ad8b284f6a0808c5c93 (diff)
downloadFreeBSD-src-b0d61785cae024b1f44119446a940ee14c9ac959.zip
FreeBSD-src-b0d61785cae024b1f44119446a940ee14c9ac959.tar.gz
BSD 4.4 Lite Share Sources
Diffstat (limited to 'share/doc')
-rw-r--r--share/doc/Makefile7
-rw-r--r--share/doc/iso/README3
-rw-r--r--share/doc/iso/ucb/addr.nr155
-rw-r--r--share/doc/iso/ucb/def.nr144
-rw-r--r--share/doc/iso/ucb/intro.nr72
-rw-r--r--share/doc/iso/ucb/ipc.nr331
-rw-r--r--share/doc/iso/ucb/macros.nr50
-rw-r--r--share/doc/iso/ucb/net_serv.nr163
-rw-r--r--share/doc/iso/ucb/program.nr42
-rw-r--r--share/doc/iso/ucb/trans_serv.nr697
-rw-r--r--share/doc/iso/wisc/Makefile73
-rw-r--r--share/doc/iso/wisc/Outline18
-rw-r--r--share/doc/iso/wisc/TODO2
-rw-r--r--share/doc/iso/wisc/addr.nr155
-rw-r--r--share/doc/iso/wisc/appendix_a.nr51
-rw-r--r--share/doc/iso/wisc/appendix_b.nr10
-rw-r--r--share/doc/iso/wisc/debug.nr1043
-rw-r--r--share/doc/iso/wisc/def.nr144
-rwxr-xr-xshare/doc/iso/wisc/dogrn6
-rw-r--r--share/doc/iso/wisc/eicon.nr729
-rw-r--r--share/doc/iso/wisc/eicon.table5.1.orig.nr126
-rw-r--r--share/doc/iso/wisc/errors.nr363
-rw-r--r--share/doc/iso/wisc/esis_design.nr114
-rw-r--r--share/doc/iso/wisc/figs/CONS_primitives.nr77
-rw-r--r--share/doc/iso/wisc/figs/Makefile18
-rw-r--r--share/doc/iso/wisc/figs/NS_primitives.nr69
-rw-r--r--share/doc/iso/wisc/figs/TS_primitives.nr60
-rw-r--r--share/doc/iso/wisc/figs/addrfmt.nr22
-rw-r--r--share/doc/iso/wisc/figs/clnp_input.grn18
-rw-r--r--share/doc/iso/wisc/figs/clnp_input.gsrc338
-rw-r--r--share/doc/iso/wisc/figs/clnp_input.nr188
-rw-r--r--share/doc/iso/wisc/figs/clnp_output.grn18
-rw-r--r--share/doc/iso/wisc/figs/clnp_output.gsrc376
-rw-r--r--share/doc/iso/wisc/figs/clnp_output.nr233
-rw-r--r--share/doc/iso/wisc/figs/ecn_network.grn19
-rw-r--r--share/doc/iso/wisc/figs/ecn_network.gsrc288
-rw-r--r--share/doc/iso/wisc/figs/ecn_network.nr0
-rw-r--r--share/doc/iso/wisc/figs/ecn_queue.grn19
-rw-r--r--share/doc/iso/wisc/figs/ecn_queue.gsrc371
-rw-r--r--share/doc/iso/wisc/figs/ecn_queue.nr262
-rw-r--r--share/doc/iso/wisc/figs/ecn_vc.grn19
-rw-r--r--share/doc/iso/wisc/figs/ecn_vc.gsrc273
-rw-r--r--share/doc/iso/wisc/figs/ecn_vc.nr205
-rw-r--r--share/doc/iso/wisc/figs/func_units.grn18
-rw-r--r--share/doc/iso/wisc/figs/func_units.gsrc603
-rw-r--r--share/doc/iso/wisc/figs/func_units.nr0
-rw-r--r--share/doc/iso/wisc/figs/link_to_CONS_primitives.NR.DONT_REMOVE77
-rw-r--r--share/doc/iso/wisc/figs/link_to_TS_primitives.NR.DONT_REMOVE60
-rw-r--r--share/doc/iso/wisc/figs/mbufrcv.grn13
-rw-r--r--share/doc/iso/wisc/figs/mbufrcv.gsrc1006
-rw-r--r--share/doc/iso/wisc/figs/mbufrcv.nr504
-rw-r--r--share/doc/iso/wisc/figs/mbufsnd.grn13
-rw-r--r--share/doc/iso/wisc/figs/mbufsnd.gsrc534
-rw-r--r--share/doc/iso/wisc/figs/mbufsnd.nr284
-rw-r--r--share/doc/iso/wisc/figs/osi_addr.grn18
-rw-r--r--share/doc/iso/wisc/figs/osi_addr.gsrc62
-rw-r--r--share/doc/iso/wisc/figs/osi_addr.nr59
-rw-r--r--share/doc/iso/wisc/figs/tppt.grn18
-rw-r--r--share/doc/iso/wisc/figs/tppt.gsrc411
-rw-r--r--share/doc/iso/wisc/figs/tppt.gsrc.save335
-rw-r--r--share/doc/iso/wisc/figs/tppt.nr296
-rw-r--r--share/doc/iso/wisc/figs/trans_flow.grn20
-rw-r--r--share/doc/iso/wisc/figs/trans_flow.gsrc567
-rw-r--r--share/doc/iso/wisc/figs/trans_flow.nr274
-rw-r--r--share/doc/iso/wisc/figs/unix_ipc.grn18
-rw-r--r--share/doc/iso/wisc/figs/unix_ipc.gsrc1041
-rw-r--r--share/doc/iso/wisc/figs/unix_ipc.nr499
-rw-r--r--share/doc/iso/wisc/intro.nr76
-rw-r--r--share/doc/iso/wisc/ipc.nr372
-rw-r--r--share/doc/iso/wisc/macros.nr50
-rw-r--r--share/doc/iso/wisc/net_design.nr1139
-rw-r--r--share/doc/iso/wisc/net_serv.nr163
-rw-r--r--share/doc/iso/wisc/parts.nr39
-rwxr-xr-xshare/doc/iso/wisc/preview7
-rw-r--r--share/doc/iso/wisc/program.nr51
-rw-r--r--share/doc/iso/wisc/trans_design.nr1466
-rw-r--r--share/doc/iso/wisc/trans_serv.nr692
-rw-r--r--share/doc/iso/wiscman/arp.4p118
-rw-r--r--share/doc/iso/wiscman/clnp.4p91
-rw-r--r--share/doc/iso/wiscman/cons.4241
-rw-r--r--share/doc/iso/wiscman/cons.4p196
-rw-r--r--share/doc/iso/wiscman/if.4n136
-rw-r--r--share/doc/iso/wiscman/iso.4f87
-rw-r--r--share/doc/iso/wiscman/rvd.4p90
-rw-r--r--share/doc/iso/wiscman/tp.4p609
-rw-r--r--share/doc/papers/beyond4.3/Makefile7
-rw-r--r--share/doc/papers/beyond4.3/beyond43.ms518
-rw-r--r--share/doc/papers/diskperf/Makefile11
-rw-r--r--share/doc/papers/diskperf/abs.ms176
-rw-r--r--share/doc/papers/diskperf/appendix.ms98
-rw-r--r--share/doc/papers/diskperf/conclusions.ms127
-rw-r--r--share/doc/papers/diskperf/equip.ms177
-rw-r--r--share/doc/papers/diskperf/methodology.ms111
-rw-r--r--share/doc/papers/diskperf/motivation.ms93
-rw-r--r--share/doc/papers/diskperf/results.ms337
-rw-r--r--share/doc/papers/diskperf/tests.ms108
-rw-r--r--share/doc/papers/fsinterface/Makefile7
-rw-r--r--share/doc/papers/fsinterface/abstract.ms73
-rw-r--r--share/doc/papers/fsinterface/fsinterface.ms1176
-rw-r--r--share/doc/papers/fsinterface/slides.t318
-rw-r--r--share/doc/papers/kernmalloc/Makefile11
-rw-r--r--share/doc/papers/kernmalloc/alloc.fig115
-rw-r--r--share/doc/papers/kernmalloc/appendix.t137
-rw-r--r--share/doc/papers/kernmalloc/kernmalloc.t649
-rw-r--r--share/doc/papers/kernmalloc/spell.ok57
-rw-r--r--share/doc/papers/kernmalloc/usage.tbl75
-rw-r--r--share/doc/papers/kerntune/0.t129
-rw-r--r--share/doc/papers/kerntune/1.t48
-rw-r--r--share/doc/papers/kerntune/2.t234
-rw-r--r--share/doc/papers/kerntune/3.t290
-rw-r--r--share/doc/papers/kerntune/4.t99
-rw-r--r--share/doc/papers/kerntune/Makefile10
-rw-r--r--share/doc/papers/kerntune/fig2.pic57
-rw-r--r--share/doc/papers/memfs/0.t86
-rw-r--r--share/doc/papers/memfs/1.t392
-rw-r--r--share/doc/papers/memfs/A.t173
-rw-r--r--share/doc/papers/memfs/Makefile22
-rw-r--r--share/doc/papers/memfs/ref.bib49
-rw-r--r--share/doc/papers/memfs/spell.ok18
-rw-r--r--share/doc/papers/memfs/tmac.srefs177
-rw-r--r--share/doc/papers/newvm/0.t86
-rw-r--r--share/doc/papers/newvm/1.t377
-rw-r--r--share/doc/papers/newvm/Makefile10
-rw-r--r--share/doc/papers/newvm/a.t239
-rw-r--r--share/doc/papers/newvm/spell.ok56
-rw-r--r--share/doc/papers/nqnfs/Makefile10
-rw-r--r--share/doc/papers/nqnfs/nqnfs.me2007
-rw-r--r--share/doc/papers/px/Makefile15
-rw-r--r--share/doc/papers/px/fig1.1.n71
-rw-r--r--share/doc/papers/px/fig1.2.n68
-rw-r--r--share/doc/papers/px/fig1.3.n60
-rw-r--r--share/doc/papers/px/fig2.3.raw103
-rw-r--r--share/doc/papers/px/fig2.4.n57
-rw-r--r--share/doc/papers/px/fig3.2.n56
-rw-r--r--share/doc/papers/px/fig3.3.n57
-rw-r--r--share/doc/papers/px/pxin0.n140
-rw-r--r--share/doc/papers/px/pxin1.n538
-rw-r--r--share/doc/papers/px/pxin2.n923
-rw-r--r--share/doc/papers/px/pxin3.n597
-rw-r--r--share/doc/papers/px/pxin4.n67
-rw-r--r--share/doc/papers/px/table2.1.n83
-rw-r--r--share/doc/papers/px/table2.2.n85
-rw-r--r--share/doc/papers/px/table2.3.n45
-rw-r--r--share/doc/papers/px/table3.1.n47
-rw-r--r--share/doc/papers/px/tmac.p113
-rw-r--r--share/doc/papers/relengr/0.t91
-rw-r--r--share/doc/papers/relengr/1.t69
-rw-r--r--share/doc/papers/relengr/2.t146
-rw-r--r--share/doc/papers/relengr/3.t390
-rw-r--r--share/doc/papers/relengr/Makefile12
-rw-r--r--share/doc/papers/relengr/ref.bib26
-rw-r--r--share/doc/papers/relengr/ref.bib.ig3
-rw-r--r--share/doc/papers/relengr/spell.ok15
-rw-r--r--share/doc/papers/relengr/tmac.srefs179
-rw-r--r--share/doc/papers/sysperf/0.t247
-rw-r--r--share/doc/papers/sysperf/1.t81
-rw-r--r--share/doc/papers/sysperf/2.t258
-rw-r--r--share/doc/papers/sysperf/3.t694
-rw-r--r--share/doc/papers/sysperf/4.t774
-rw-r--r--share/doc/papers/sysperf/5.t285
-rw-r--r--share/doc/papers/sysperf/6.t70
-rw-r--r--share/doc/papers/sysperf/7.t164
-rw-r--r--share/doc/papers/sysperf/Makefile22
-rw-r--r--share/doc/papers/sysperf/a1.t668
-rw-r--r--share/doc/papers/sysperf/a2.t117
-rw-r--r--share/doc/psd/00.contents191
-rw-r--r--share/doc/psd/05.sysman/0.t292
-rw-r--r--share/doc/psd/05.sysman/1.0.t56
-rw-r--r--share/doc/psd/05.sysman/1.1.t215
-rw-r--r--share/doc/psd/05.sysman/1.2.t272
-rw-r--r--share/doc/psd/05.sysman/1.3.t254
-rw-r--r--share/doc/psd/05.sysman/1.4.t137
-rw-r--r--share/doc/psd/05.sysman/1.5.t225
-rw-r--r--share/doc/psd/05.sysman/1.6.t135
-rw-r--r--share/doc/psd/05.sysman/1.7.t100
-rw-r--r--share/doc/psd/05.sysman/2.0.t83
-rw-r--r--share/doc/psd/05.sysman/2.1.t138
-rw-r--r--share/doc/psd/05.sysman/2.2.t470
-rw-r--r--share/doc/psd/05.sysman/2.3.t412
-rw-r--r--share/doc/psd/05.sysman/2.4.t174
-rw-r--r--share/doc/psd/05.sysman/2.5.t39
-rw-r--r--share/doc/psd/05.sysman/Makefile11
-rw-r--r--share/doc/psd/05.sysman/a.t235
-rw-r--r--share/doc/psd/05.sysman/spell.ok332
-rw-r--r--share/doc/psd/20.ipctut/Makefile13
-rw-r--r--share/doc/psd/20.ipctut/dgramread.c83
-rw-r--r--share/doc/psd/20.ipctut/dgramsend.c80
-rw-r--r--share/doc/psd/20.ipctut/fig2.pic77
-rw-r--r--share/doc/psd/20.ipctut/fig2.xfig100
-rw-r--r--share/doc/psd/20.ipctut/fig3.pic69
-rw-r--r--share/doc/psd/20.ipctut/fig3.xfig100
-rw-r--r--share/doc/psd/20.ipctut/fig8.pic79
-rw-r--r--share/doc/psd/20.ipctut/fig8.xfig116
-rw-r--r--share/doc/psd/20.ipctut/pipe.c74
-rw-r--r--share/doc/psd/20.ipctut/socketpair.c77
-rw-r--r--share/doc/psd/20.ipctut/strchkread.c106
-rw-r--r--share/doc/psd/20.ipctut/streamread.c102
-rw-r--r--share/doc/psd/20.ipctut/streamwrite.c81
-rw-r--r--share/doc/psd/20.ipctut/tutor.me939
-rw-r--r--share/doc/psd/20.ipctut/udgramread.c80
-rw-r--r--share/doc/psd/20.ipctut/udgramsend.c68
-rw-r--r--share/doc/psd/20.ipctut/ustreamread.c96
-rw-r--r--share/doc/psd/20.ipctut/ustreamwrite.c71
-rw-r--r--share/doc/psd/21.ipc/0.t93
-rw-r--r--share/doc/psd/21.ipc/1.t106
-rw-r--r--share/doc/psd/21.ipc/2.t714
-rw-r--r--share/doc/psd/21.ipc/3.t409
-rw-r--r--share/doc/psd/21.ipc/4.t514
-rw-r--r--share/doc/psd/21.ipc/5.t1667
-rw-r--r--share/doc/psd/21.ipc/Makefile10
-rw-r--r--share/doc/psd/21.ipc/spell.ok347
-rw-r--r--share/doc/psd/Makefile22
-rw-r--r--share/doc/psd/Title131
-rw-r--r--share/doc/smm/00.contents161
-rw-r--r--share/doc/smm/01.setup/0.t131
-rw-r--r--share/doc/smm/01.setup/1.t172
-rw-r--r--share/doc/smm/01.setup/2.t1658
-rw-r--r--share/doc/smm/01.setup/3.t2001
-rw-r--r--share/doc/smm/01.setup/4.t713
-rw-r--r--share/doc/smm/01.setup/5.t586
-rw-r--r--share/doc/smm/01.setup/6.t663
-rw-r--r--share/doc/smm/01.setup/Makefile15
-rw-r--r--share/doc/smm/01.setup/spell.ok618
-rw-r--r--share/doc/smm/04.quotas/Makefile7
-rw-r--r--share/doc/smm/04.quotas/quotas.ms318
-rw-r--r--share/doc/smm/05.fastfs/0.t159
-rw-r--r--share/doc/smm/05.fastfs/1.t112
-rw-r--r--share/doc/smm/05.fastfs/2.t143
-rw-r--r--share/doc/smm/05.fastfs/3.t594
-rw-r--r--share/doc/smm/05.fastfs/4.t252
-rw-r--r--share/doc/smm/05.fastfs/5.t293
-rw-r--r--share/doc/smm/05.fastfs/6.t159
-rw-r--r--share/doc/smm/05.fastfs/Makefile10
-rw-r--r--share/doc/smm/06.nfs/0.t75
-rw-r--r--share/doc/smm/06.nfs/1.t588
-rw-r--r--share/doc/smm/06.nfs/2.t530
-rw-r--r--share/doc/smm/06.nfs/Makefile7
-rw-r--r--share/doc/smm/06.nfs/ref.t123
-rw-r--r--share/doc/smm/18.net/0.t184
-rw-r--r--share/doc/smm/18.net/1.t66
-rw-r--r--share/doc/smm/18.net/2.t85
-rw-r--r--share/doc/smm/18.net/3.t59
-rw-r--r--share/doc/smm/18.net/4.t67
-rw-r--r--share/doc/smm/18.net/5.t184
-rw-r--r--share/doc/smm/18.net/6.t664
-rw-r--r--share/doc/smm/18.net/7.t256
-rw-r--r--share/doc/smm/18.net/8.t166
-rw-r--r--share/doc/smm/18.net/9.t124
-rw-r--r--share/doc/smm/18.net/Makefile10
-rw-r--r--share/doc/smm/18.net/a.t219
-rw-r--r--share/doc/smm/18.net/b.t145
-rw-r--r--share/doc/smm/18.net/c.t151
-rw-r--r--share/doc/smm/18.net/d.t73
-rw-r--r--share/doc/smm/18.net/e.t129
-rw-r--r--share/doc/smm/18.net/f.t117
-rw-r--r--share/doc/smm/18.net/spell.ok307
-rw-r--r--share/doc/smm/Makefile28
-rw-r--r--share/doc/smm/Title203
-rw-r--r--share/doc/usd/00.contents262
-rw-r--r--share/doc/usd/18.msdiffs/Makefile7
-rw-r--r--share/doc/usd/18.msdiffs/ms.diffs287
-rw-r--r--share/doc/usd/19.memacros/Makefile7
-rw-r--r--share/doc/usd/19.memacros/intro.me2347
-rw-r--r--share/doc/usd/20.meref/Makefile7
-rw-r--r--share/doc/usd/20.meref/ref.me2384
-rw-r--r--share/doc/usd/Makefile24
-rw-r--r--share/doc/usd/Title120
267 files changed, 65075 insertions, 0 deletions
diff --git a/share/doc/Makefile b/share/doc/Makefile
new file mode 100644
index 0000000..a0e5bb6
--- /dev/null
+++ b/share/doc/Makefile
@@ -0,0 +1,7 @@
+# @(#)Makefile 8.1 (Berkeley) 6/5/93
+
+SUBDIR= psd smm usd
+
+all depend lint tags:
+
+.include <bsd.subdir.mk>
diff --git a/share/doc/iso/README b/share/doc/iso/README
new file mode 100644
index 0000000..7f3c679
--- /dev/null
+++ b/share/doc/iso/README
@@ -0,0 +1,3 @@
+The documentation contained here is horribly out of
+date and not to be trusted. However, it's probably
+better than nothing at all.
diff --git a/share/doc/iso/ucb/addr.nr b/share/doc/iso/ucb/addr.nr
new file mode 100644
index 0000000..57eff30
--- /dev/null
+++ b/share/doc/iso/ucb/addr.nr
@@ -0,0 +1,155 @@
+.NC "NSAP Addresses & Routing"
+.sh 1 "OSI Address Formats"
+.pp
+ARGO supports an ISO address family, AF_ISO, in addition to the
+DoD Internet address family, AF_INET.
+Addresses in the family AF_ISO
+take the form described by ISO 8348/DAD2, which is an addendum to the
+OSI network service standard that describes network layer addressing.
+.sh 2 "ISO 8348/DAD2 Tutorial"
+.pp
+.\" FIGURE
+.\".so figs/osi_addr.nr
+.so ../wisc/figs/addrfmt.nr
+.CF
+shows the
+format of an OSI NSAP-address.
+The address has two major parts: the initial domain part
+(IDP) and the domain specific part (DSP). The IDP is further divided into
+two parts: the authority and format identifier (AFI) and the
+initial domain identifier (IDI). The AFI specifies the format of the
+IDI, the authority responsible for allocating values of the IDI, and
+the syntax of the DSP. The IDI specifies the network addressing domain
+from which DSP values are allocated, and the authority responsible for
+allocating DSP values.
+.sh 2 "Supported Formats"
+.pp
+ARGO supports three types of ISO NSAP-addresses:
+one type with AFI 37(hex) and two types with AFI 47(hex).
+.sh 3 "AFI 37"
+.pp
+This value of the AFI defines the IDI to be an X.121 address or DTE
+address.
+The DTE address is encoded in binary coded decimal.
+The DSP syntax is binary.
+This form is intended to be used when communicating
+across a public data network (PDN).
+The ARGO software and documentation
+refer to this type of NSAP-address as a
+\*(lqtype 37.\*(rq
+address.
+.sh 3 "AFI 47"
+.pp
+The value of 47 for the AFI defines the IDI to be a 4 digit International
+Code Designator (ICD) allocated according to ISO 6523.
+ARGO support two
+ICD values.
+.sh 4 "ICD 0004"
+.pp
+The ICD value of 0004 is assigned to OSINET,
+an experimental OSI network overseen by
+National Institute of Science and Technology.\**
+.(f
+\** formerly the National Bureau of Standards
+.)f
+When this style of NSAP-address
+is used,
+the DSP is divided into four parts: an organization identifier (2 bytes),
+a subnet identifier (2 bytes),
+an SNPA-part (4-8 bytes), and
+an NSAP selector (1 byte).
+The use of these fields is defined by the OSINET steering committee.
+This type of address is known as an
+\*(lqOSINET\*(rq
+address.
+.sh 4 "ICD 0006"
+.pp
+The ICD value of 0006 is assigned to the Department of Defense (DoD).
+In ARGO, NSAP-addresses with an ICD value of 0006
+are of the format defined in RFC 986, a proposal for embedding DARPA Internet
+addresses within an OSI NSAP-address.
+In this case, the DSP takes the form:
+version (1 byte), DARPA Internet Address (4 bytes), upper layer protocol
+identifier (1 byte).
+This is called an
+\*(lqrfc986\*(rq
+address.
+.sh 1 "Internal Representation"
+.pp
+Internally, an NSAP address takes the form
+.(b
+\fC
+.TS
+tab(+);
+l s s s s.
+struct iso_addr {
+.T&
+l l l l l.
++u_char+isoa_afi;+/* authority &
++++format id */
++union+{+
+++struct addr_37+addr_37;+/* x.121 */
+++struct addr_osinet+addr_osinet;+/* OSINET */
+++struct addr_rfc986+addr_rfc986;+/* Internet*/
++}+isoa_u;
++u_char+isoa_len;+/* length */
+}
+.TE
+\fR
+.)b
+The field \fIisoa_afi\fR contains the AFI for the address.
+The union
+\fIisoa_u\fR contains the remainder of the address.
+The length of the entire address (the AFI, IDI, and DSP) is
+stored in \fIisoa_len\fR.
+.sh 1 "Network Layer Routing"
+.pp
+Routing at the network layer is performed by the
+routing procedure \fIrtalloc()\fR as described in Chapter 5.
+\fIRtalloc()\fR was designed for used in the DoD Internet
+domain.
+An unfortunate
+effect of this is that routing decisions are based upon either the
+entire NSAP address or the network portion of the address.
+The problem is defining the network portion of an NSAP address.
+The location and extent of the
+network portion of an NSAP address depends on the
+style of NSAP address.
+This decision is made by the function \fIiso_netof()\fR.
+.sh 2 "Network Portion of Type 37 Addresses"
+.pp
+There is no network portion of an X.121 address.
+In ARGO, the network portion of a type 37 address
+is defined to be just the AFI.
+The obvious consequence of this is that all type 37 addresses will
+match all other type 37 addresses
+in a network-portion comparison.
+.sh 2 "Network Portion of OSINET Addresses"
+.pp
+The network portion of an OSINET address is the organization identifier and
+the subnet identifier.
+.sh 2 "Network Portion of RFC986 Addresses"
+.pp
+The network portion of an RFC986 address is the network portion of the
+embedded DARPA Internet address.
+ARGO does not support subnetting, a method of subdividing Internet addresses.
+.sh 1 "NSAP Address / Subnetwork Point of Attachment Mapping"
+.pp
+In order to transmit a packet on a real subnetwork, the destination
+NSAP address
+must be mapped to an SNPA address.
+An SNPA address is the real, "hardware" address
+of a system on a network.
+This address corresponds to the 6 byte Ethernet or Token Ring
+Adapter address,
+or to the DTE address, which may be up to 7 bytes
+long (14 decimal digits).
+.pp
+A table, \fIsnpa_cache\fR is kept in the kernel which contains the
+translation between NSAP-addresses and SNPA-addresses.
+This table is used by \fIiso_snparesolve()\fR whenever a
+datagram must be dispatched.
+The table is maintained by the the ISO ES-IS protocol entity.
+Entries can be added and deleted
+by the user program \fIclnlutil(8)\fR and
+by the CONS entity.
diff --git a/share/doc/iso/ucb/def.nr b/share/doc/iso/ucb/def.nr
new file mode 100644
index 0000000..9d8e8b8
--- /dev/null
+++ b/share/doc/iso/ucb/def.nr
@@ -0,0 +1,144 @@
+.NC "Definitions"
+.sh 1 "General Terms"
+.ip "Kernel" 5
+The source code or binary module for the Acis Operating System
+(also know as AOS and IBM/4.3).
+.ip "User process" 5
+An instance of a program that is
+running in unprivileged mode, in the unprivileged address space
+commonly know as "user address space", in other words, not
+part of the kernel.
+.ip "IPC" 5
+Interprocess communication, the mechanism by which two different
+user processes send messages to each other.
+.ip "Unix, AOS" 5
+ACIS Operating System, the special testing release of Berkeley Unix 4.4BSD.
+.ip "PCB, pcb" 5
+Protocol control block. Each instance of a protocol machine
+keeps status information, addresses, and in some cases queues
+in a pcb for each connection or socket.
+.ip "Domain" 5
+In the Berkeley Unix environment, a domain is an abstract entity which
+comprises a network architecture, addressing scheme, address format,
+network protocols, and transport protocols.
+.sh 1 "Transport Layer Terms"
+.ip "ISO 8073"
+ISO Draft International Standard 8073, Transport Protocol Specification
+.ip "TP" 5
+The collection of transport
+classes that have been implemented in ARGO, classes 0 and 4.
+Also means the ARGO implementation of TP.
+.ip "TP 0" 5
+Transport class 0.
+.ip "TP 4" 5
+Transport class 4.
+.ip "Transport entity" 5
+Software or hardware that implements the elements of procedure
+described in ISO 8073.
+.ip "Transport user" 5
+User process that make use of the services
+provided by a transport entity.
+.ip "Transport service interface" 5
+The syntax and semantics of the set of procedures, functions, and system calls
+that are invoked by a transport user,
+through which the services of the transport entity are delivered.
+.ip "TPDU" 5
+Transport protocol data unit, a packet that is
+passed from one transport entity to another.
+.ip "TSDU" 5
+Transport service data unit, the logical unit of data that is
+passed from a transport entity to a transport user, or from
+a transport user to a transport entity.
+.ip "CR TPDU" 5
+Connection request TPDU.
+.ip "CC TPDU" 5
+Connection confirm TPDU.
+.ip "DR TPDU" 5
+Disconnect request TPDU.
+.ip "DC TPDU" 5
+Disconnect confirm TPDU.
+.ip "DT TPDU" 5
+Normal data TPDU.
+.ip "XPD TPDU" 5
+Expedited data TPDU.
+.ip "AK TPDU" 5
+Normal data acknowledgment TPDU.
+.ip "XAK TPDU" 5
+Expedited data acknowledgment TPDU.
+.ip "ER TPDU" 5
+Error TPDU.
+.sh 1 "Network Layer Terms"
+.ip "ISO 8473"
+ISO Draft International Standard 8473, connectionless network protocol.
+.ip "CONS"
+Connection Oriented Network Service.
+.ip "COSNS"
+Connection Oriented Sub-Network Service.
+.ip "CLNS"
+Connectionless Network Service.
+.ip "CLNP"
+Connectionless Network Protocol, or ISO 8473.
+.ip "Network Entity"
+Software or hardware that implements the elements of procedure described
+in ISO 8473.
+.ip "Network Service User"
+Software components that make use of the services provided by a network
+entity.
+.ip "Network Service Provider"
+Software components that provide the services of a network entity.
+.ip "NSAP"
+Network Service Access Point. The point at which the OSI network service
+is made available to the network service user by the network service
+provider.
+.ip "NSAP address"
+Information that the network service provider needs to identify an
+NSAP. The source and destination address fields of a CLNP packet
+are NSAP addresses.
+.ip "ES"
+End system. A system running the complete suite of OSI protocols which can
+act as an end point for communication.
+.ip "IS"
+Intermediate system. A system running the OSI layers 1, 2, and 3 which
+can act only a packet router.
+.ip "SNPA"
+The Subnetwork Point of Attachement is the point where a \fIreal\fR
+end or intermediate system is attached to a \fIreal\fR subnetwork.
+.ip "SNPA address"
+Information that a \fIreal\fR subnetwork need to identify a \fIreal\fR end
+or intermediate system. This is commonly referred to as the hardware address.
+.ip "NPDU"
+Network Protocol Data Unit. The unit of data which is exchanged between
+network entities.
+.ip "DT NPDU"
+Normal data NPDU.
+.ip "ER NPDU"
+Error report NPDU.
+.ip "Initial NPDU"
+A NPDU carrying the whole of the user data from an N-UNITDATA request.
+.ip "Derived NPDU"
+a NPDU whose field ar identical to those of an initial NPDU, except that it
+carries only a segment of the user data from an N-UNITDATA request.
+.ip "Segment"
+A distinct unit of data consisting of part or all of the user data provided
+in the N-UNITDATA request and delivered in the N-UNITDATA indication.
+.ip "Segmentation"
+The act of generation two or more derived NPDUs from an initial or derived
+NPDU.
+.ip "Fragment"
+A DoD Internet Protocol term with the same meaning as "segment". Used
+synonymously with "segment."
+.ip "Fragmentation"
+A DoD Internet Protocol term with the same meaning as "segmentation". Used
+synonymously with "segmentation."
+.ip "Reassembly"
+The act of regenerating an initial NPDU from two ore more derived NPDUs.
+.ip "MTU"
+Maximum transmission unit. The maximum size of a packet that can be
+transmitted on a medium or through a protocol.
+For example, the MTU of the TP protocol is 8192 bytes, the MTU
+of and Ethernet device is 1500 bytes, and the MTU of the OSI Network
+service is 512 bytes.
+.ip "Network interface"
+The device used to attach a computer to a network, for example,
+an Ethernet adapter, or a Token Ring adapter.
+This terminology is inherited from BSD Unix.
diff --git a/share/doc/iso/ucb/intro.nr b/share/doc/iso/ucb/intro.nr
new file mode 100644
index 0000000..196b7ae
--- /dev/null
+++ b/share/doc/iso/ucb/intro.nr
@@ -0,0 +1,72 @@
+.NC "Introduction"
+.sh 1 "Introduction"
+.pp
+This document describes the usage of the ISO
+transport and network layers written for the ACIS Operating System,
+the IBM ACIS port of Berkeley 4.3 Unix\**
+.(f
+\** Unix is a registered trademark of AT&T.
+.)f
+for the IBM RT PC,
+hereafter called AOS, as modified by UC Berkeley.
+This document describes work in progress and is an extremely
+hasty job of editing an earlier document written by colleagues
+at the university of Wisconsin.
+It is to be regarded as an emergency manual prepared for testers
+at NIST and should not be redistributed further.
+As such, there are philosophical
+statements that Berkeley fundamentally disagrees with, which
+we do not presently have the time to rip out.
+Collectively, this work is called the Wisconsin ARGO kernel.
+The ARGO kernel supports the
+the connection-oriented ISO transport service (COTS), the
+ISO connectionless network service (CLNS)
+and a
+connection-oriented network service (CONS).
+The COTS is provided by the ISO transport protocol TP,
+ISO 8073 Revised.
+The CLNS is provided by the connectionless network protocol,
+ISO 8473.
+The CONS is provided by the X.25 protocols.
+The ARGO implementation of the CONS is not a complete
+ISO CONS, but contains enough of the CONS to support
+the COTS and the CLNS (in the latter case, the CONS can be
+viewed as a subnetwork service).
+.pp
+The purposes of this document are
+.ip "1) "
+to describe the transport service and the software interface
+between the user and provider of this service,
+.ip "2) "
+to describe the network service and the software interface it
+provides.
+.pp
+It is assumed that the reader is familiar with the \fBC\fR
+programming language,
+with Unix conventions, and with the ISO specifications listed in Appendix A.
+.sh 1 "Organization"
+.pp
+This document is composed of several chapters.
+Chapter One contains this introduction. Chapter Two presents a
+definition of terms and phrases used throughout the document.
+Chapter Three describes the transport service interface, which is
+the interface between the transport protocol implementation software and the transport user software.
+Chapter Four describes the network service interface, and the interface
+above and below the network layer.
+Chapter Five explains the format of an OSI address.
+Chapter Six describes the
+the architecture of the interprocess communication support in the
+kernel, which to a large degree mandates
+the design of a protocol implementation for a 4.3 Unix kernel.
+.\" Appendix A is a list of the applicable ISO standards.
+.\" The manual pages relevant to the transport and network layers
+.\" are included as Appendix B.
+.pp
+Several conventions are followed in this document.
+All procedure names and system call names are followed
+by a pair of parentheses, for example,
+.i read ().
+References to manual pages consist of the name of the
+manual page, followed by the section in which
+the man page is found:
+.i read (2).
diff --git a/share/doc/iso/ucb/ipc.nr b/share/doc/iso/ucb/ipc.nr
new file mode 100644
index 0000000..6944970
--- /dev/null
+++ b/share/doc/iso/ucb/ipc.nr
@@ -0,0 +1,331 @@
+.NC "The Design of Unix IPC"
+.sh 1 "General"
+.pp
+The ARGO implementation of
+TP and CLNP was designed to fit into the AOS
+kernel
+as easily as possible.
+All the standard protocol hooks are used.
+To understand the design, it is useful to have
+read
+Leffler, Joy, and Fabry:
+\*(lq4.2 BSD Networking Implementation Notes\*(rq July 1983.
+This section describes the
+design of the IPC support in the AOS kernel.
+.sh 1 "Functional Unit Overview"
+.pp
+The
+AOS
+kernel
+is a monolithic program of considerable size and complexity.
+The code can be separated into parts of distinct function,
+but there are no kernel processes per se.
+The kernel code is either executed on behalf of a user
+process, in which case the kernel was entered by a system call,
+or it is executed on behalf of a hardware or software interrupt.
+The following sections describe briefly the major functional units
+of the kernel.
+.\" FIGURE
+.so ../wisc/figs/func_units.nr
+.CF
+shows the arrangement of these kernel units and
+their interactions.
+.sh 2 "The file system."
+.pp
+.sh 2 "Virtual memory support."
+.pp
+This includes protection, swapping, paging, and
+text sharing.
+.sh 2 "Blocked device drivers (disks, tapes)."
+.pp
+All these drivers share some minor functional units,
+such as buffer management and bus support
+for the various types of busses on the machine.
+.sh 2 "Interprocess communication (IPC)."
+.pp
+This includes
+support for various protocols,
+buffer management, and a standard interface for inter-protocol
+communication.
+.sh 2 "Network interface drivers."
+.pp
+These drivers are closely tied to the IPC support.
+They use the IPC's buffer management unit rather
+than the buffers used by the blocked device drivers.
+The interface between these drivers and the rest of the kernel
+differs from the interface used by the blocked devices.
+.sh 2 "Tty driver"
+.pp
+This is terminal support, including the user interface
+and the device drivers.
+.sh 2 "System call interface."
+.pp
+This handles signals, traps, and system calls.
+.sh 2 "Clock."
+.pp
+The clock is used in various forms by many
+other units.
+.sh 2 "User process support (the rest)."
+.pp
+This includes support for accounting, process creation,
+control, scheduling, and destruction.
+.pp
+.sh 2 "IPC"
+.pp
+The major functional unit that supports IPC
+can be divided into the following smaller functional
+units.
+.sh 3 "Buffer management."
+.pp
+All protocols share a pool of buffers called \fImbufs\fR.
+The internal structure has changed considerably since 4.3:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct mbuf {
+.T&
+l l l l.
++struct mbuf+*m_next;+/* next buffer in chain */
++struct mbuf+*m_act;+/* link in 2-d structure */
++u_long+m_len;+/* amount of data */
++char *+m_data;+/* location of data */
++short+m_type;+/* type of data */
++short+m_flags;+/* note if EOR, Packet HDR, Ext. stored */
++++/* If packet header add: */
+int+m_pkthdr.len;+/* total packet length */
+struct ifnet+*m_pkthdr.recvif;+/* rcv interface*/
++++/* If external storage add: */
++char +*m_ext.ext_buf;+/* start of buffer */
++void+(*m_ext.ext_free)();+/* free routine if not the usual */
++u_int+m_ext.ext_size;+/* size of buffer, for ext_free */
++++/* For non external */
++char+m_dat[depending];+/* done by unions, etc. */
+};
+.TE
+\fR
+.)b
+.pp
+There are two forms of mbufs - with and without external storage.
+Small ones are 128 octets in 4.4BSD.
+The data in these mbufs are located
+in the mbuf structure itself.
+Large mbufs, called \fIclusters\fR, are page-sized
+and page-aligned.
+They may be \*(lqcopied\*(rq by multiply mapping the pages they occupy.
+They consist of a page of memory plus a small mbuf structure
+whose fields are used
+to link clusters into chains, but whose \fIm_dat\fR array is
+not used.
+The \fIm_data\fR field of the structure
+is a pointer to the active data in all cases.
+The remainder of the description in the argo document
+is generally obsolete, and I am merely deleting the
+rest of it at this point.
+.sh 3 "Routing."
+.pp
+Routing decisions in the kernel are made by the procedure \fIrtalloc()\fR.
+This procedure will scan the kernel routing tables (stored in mbufs)
+looking for a route.
+The argo document here also is quite obsolete.
+We know keep a tree structure routing table,
+and do matching under masks.
+The structure for the routing entry contains tree related
+stuff pointers (parent, l-r child for internal nodes, mask and address
+for external nodes), and may be completely revised again
+to make use of patricia trees.
+.pp
+If a route is not found, then a default route is used (if present).
+.pp
+If a route is found, the entity which called \fIrtalloc()\fR can use information
+from the \fIrtentry\fR structure to dispatch the datagram. Specifically, the
+datagram is queued on the interface identified by the interface
+pointer \fIrt_ifp\fR.
+.sh 3 "Socket code."
+.pp
+This is the protocol-independent part of the IPC support.
+Each communication endpoint (which may or may not be associated
+with a connection) is represented by the following structure:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct socket {
+.T&
+l l l l.
++short+so_type;+/* type, e.g. SOCK_DGRAM */
++short+so_options;+/* from socket call */
++short+so_linger;+/* time to linger @ close */
++short+so_state;+/* internal state flags */
++caddr_t+so_pcb;+/* network layer pcb */
++struct protosw+*so_proto;+/* protocol handle */
++struct socket+*so_head;+/* ptr to accept socket */
++struct socket+*so_q0;+/* queue of partial connX */
++short+so_q0len;+/* # partials on so_q0 */
++struct socket+*so_q;+/* queue of incoming connX */
++short+so_qlen;+/* # connections on so_q */
++short+so_qlimit;+/* max # queued connX */
++struct sockbuf+{
+++short+sb_cc;+/* actual chars in buffer */
+++short+sb_hiwat;+/* max actual char count */
+++short+sb_mbcnt;+/* chars of mbufs used */
+++short+sb_mbmax;+/* max chars of mbufs to use */
+++short+sb_lowat;+/* low water mark (not used yet) */
+++short+sb_timeo;+/* timeout (not used ) */
+++struct mbuf+*sb_mb;+/* the mbuf chain */
+++struct proc+*sb_sel;+/* process selecting */
+++short+sb_flags;+/* flags, see below */
++} so_rcv, so_snd;
++short+so_timeo;+/* connection timeout */
++u_short+so_error;+/* error affecting connX */
++short+so_oobmark;+/* oob mark (TCP only) */
++short+so_pgrp;+/* pgrp for signals */
+}
+.TE
+\fR
+.)b
+.pp
+The socket code maintains a pair of queues for each socket,
+\fIso_rcv\fR and \fIso_snd\fR.
+Each queue is associated with a count of the number of characters
+in the queue, the maximum number of characters allowed to be put
+in the queue, some status information (\fIsb_flags\fR), and
+several unused fields.
+For a send operation, data are copied from the user's address space
+into chains of mbufs.
+This is done by the socket module, which then calls the underlying
+transport protocol module to place the data
+on the send queue.
+This is generally done by
+appending to the chain beginning at \fIsb_mb\fR.
+The socket module copies data from the \fIso_rcv\fR queue
+to the user's address space to effect a receive operation.
+The underlying transport layer is expected to have put incoming
+data into \fIso_rcv\fR by calling procedures in this module.
+.in -5
+.sh 3 "Transport protocol management."
+.pp
+All protocols and address types must be \*(lqregistered\*(rq in a
+common way in order to use the IPC user interface.
+Each protocol must have an entry in a protocol switch table.
+Each entry takes the form:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct protosw {
+.T&
+l l l l.
++short+pr_type;+/* socket type used for */
++short+pr_family;+/* protocol family */
++short+pr_protocol;+/* protocol # from the database */
++short+pr_flags;+/* status information */
++++/* protocol-protocol hooks */
++int+(*pr_input)();+/* input (from below) */
++int+(*pr_output)();+/* output (from above) */
++int+(*pr_ctlinput)();+/* control input */
++int+(*pr_ctloutput)();+/* control output */
++++/* user-protocol hook */
++int+(*pr_usrreq)();+/* user request: see list below */
++++/* utility hooks */
++int+(*pr_init)();+/* initialization hook */
++int+(*pr_fasttimo)();+/* fast timeout (200ms) */
++int+(*pr_slowtimo)();+/* slow timeout (500ms) */
++int+(*pr_drain)();+/* free some space (not used) */
+}
+.TE
+\fR
+.)b
+.pp
+Associated with each protocol are the types of socket
+abstractions supported by the protocol (\fIpr_type\fR), the
+format of the addresses used by the protocol (\fIpr_family\fR),
+the routines to be called to perform
+a standard set of protocol functions (\fIpr_input\fR,...,\fIpr_drain\fR),
+and some status information (\fIpr_flags\fR).
+The field pr_flags keeps such information as
+SS_ISCONNECTED (this socket has a peer),
+SS_ISCONNECTING (this socket is in the process of establishing
+a connection),
+SS_ISDISCONNECTING (this socket is in the process of being disconnected),
+SS_CANTSENDMORE (this socket is half-closed and cannot send),
+SS_CANTRCVMORE (this socket is half-closed and cannot receive).
+There are some flags that are specific to the TCP concept
+of out-of-band data.
+A flag SS_OOBAVAIL was added for the ARGO implementation, to support
+the TP concept of out-of-band data (expedited data).
+.sh 3 "Network Interface Drivers"
+.pp
+The drivers for the devices attaching a Unix machine to a network
+medium share a common interface to the protocol
+software.
+There is a common data structure for managing queues,
+not surprisingly, a chain of mbufs.
+There is a set of macros that are used to enqueue and
+dequeue mbuf chains at high priority.
+A driver
+delivers an indication to a protocol entity when
+an incoming packet has been placed on a queue by
+issuing a
+software
+interrupt.
+.sh 3 "Support for individual protocols."
+.pp
+Each protocol is written as a separate functional unit.
+Because all protocols share the clock and the mbuf pool, they
+are not entirely insulated from each other.
+The details of TP are described in a section that
+follows.
+.\"*****************************************************
+.\" FIGURE
+.so ../wisc/figs/unix_ipc.nr
+.pp
+.CF
+shows the arrangement of the IPC support.
+.pp
+The AOS
+IPC was designed for DoD Internet protocols, all of
+which run over DoD IP.
+The assumptions that DoD Internet is the domain
+and that DoD IP is the network layer
+appear in the code and data structures in numerous places.
+An example is that the transport protocols all directly call
+IP routines.
+There are no hooks in the data structures through
+which the transport layer can choose a network level protocol.
+Another example is that headers are assumed to
+fit in one small mbuf (112 bytes for data in AOS).
+Another example is this:
+It is assumed in many places that buffer space is managed
+in units of characters or octets.
+The user data are copied from user address space into the kernel mbufs
+amorphously
+by the socket code, a protocol-independent part of the kernel.
+This is fine for a stream protocol, but it means that a
+packet protocol, in order to \*(lqpacketize\*(rq the data,
+must perform a memory-to-memory copy
+that might have been avoided had the protocol layer done the original
+copy from user address space.
+Furthermore, protocols that count credit in terms of packets or
+buffers rather than characters do not work efficiently because
+the computation of buffer space is not in the protocol module,
+but rather it is in the socket code module.
+This list of examples is not complete.
+.pp
+To summarize, adding a new transport protocol to the kernel consists of
+adding entries to the tables in the protocol management
+unit,
+modifying the network interface driver(s) to recognize
+new network protocol identifiers,
+adding the
+new system calls to the kernel and to the user library,
+and
+adding code modules for each of the protocols,
+and correcting deficiencies in the socket code,
+where the assumptions made about the nature of
+transport protocols do not apply.
+.i
+(Touchy touchy, aren't we!?! -- Sklower)
diff --git a/share/doc/iso/ucb/macros.nr b/share/doc/iso/ucb/macros.nr
new file mode 100644
index 0000000..88ec2d3
--- /dev/null
+++ b/share/doc/iso/ucb/macros.nr
@@ -0,0 +1,50 @@
+.\"
+.\" Macro to initialize chapter macros
+.\"
+.de IC
+.nr CN 0 1
+..
+.\"
+.\" Macro to begin new chapter
+.\"
+.de NC
+.he 'ARGO user\'s Guide, Berkeley Hack Job''Chapter \\n+(CN'
+.bp
+.sh 0 "_" 1 1 1 1 1 1
+.sz +2
+.(l C
+CHAPTER \\n(CN
+
+\fB\\$1\fR
+.)l
+.sp 1
+.(x
+Chapter \\n(CN \\$1
+.)x
+.sz -2
+..
+.\"
+.\" Figure conventions:
+.\" 1) do .so of figure source - figure reg incremented here
+.\" 2) make references to figure via CF
+.\"
+.\"
+.\" Macro to initialize figure register
+.\"
+.de IF
+.nr FG 0 1
+..
+.\"
+.\" Macro for current figure number
+.\"
+.de CF
+Figure \\n(FG
+..
+.\"
+.\" Define this macro to include section headings in table of contents
+.\"
+.de $0
+.(x
+Section \\$2 \\$1
+.)x
+..
diff --git a/share/doc/iso/ucb/net_serv.nr b/share/doc/iso/ucb/net_serv.nr
new file mode 100644
index 0000000..c24eebd
--- /dev/null
+++ b/share/doc/iso/ucb/net_serv.nr
@@ -0,0 +1,163 @@
+.NC "Network Service Interface"
+.sh 1 "Connectionless Network Service"
+.pp
+This section describes the interface to the ISO connectionless network service.
+There are really two interfaces to the CLNS: the internal interface
+and the IPC interface.
+The internal interface is based on
+procedure calls. It is used only within the kernel. The IPC interface
+allows a user process to access the CLNS directly. This is used only
+for testing and debugging purposes.
+.sh 2 "Primitives"
+.pp
+The CLNS is, by definition, connectionless. Therefore, there are no
+primitives associated with connection establishment or connection release.
+There is one primitive associated with data transfer: N-UNITDATA.
+The parameters to a N-UNITDATA request are: source NSAP address,
+destination NSAP address, quality of service, and user data.
+The parameters of a N-UNITDATA indication are identical to those of the
+request.
+In this implementation, the quality of service parameter is not supported.
+.sh 2 "Internal Interface"
+.pp
+Within the kernel, an N-UNITDATA request is effected by the procedure
+\fIclnp_output()\fR:
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+clnp_output(m0, isop, datalen, flags)
+.T&
+l l l.
+ +struct mbuf+*m0;+/* data */
+ +struct isopcb+*isop;+/* ISO protocol control block */
+ +int+datalen;+
+ +int+flags;+/* flags */
+.TE
+\fR
+.)b
+This procedure will construct a DT NPDU, route it, and transmit it on
+the appropriate subnetwork. \fIM0\fR is an mbuf chain containing the
+user data portion of the N-UNITDATA request. \fIIsopcb\fR is the iso protocol
+control block previously allocated. \fIClnp_output\fR will use the following
+fields: \fIisop_laddr\fR, isop_faddr, isop_route, isop_options,
+isop_optindex, \fI and \fRisop_clnpcache\fR.
+\fIDatalen\fR specifies the number of bytes of user data.
+The \fIflags\fR parameter will be discussed in a subsequent chapter.
+.pp
+A N-UNITDATA indication occurs when a DT NPDU arrives. The indication is
+generated by calling the appropriate upper layer protocol input routine.
+In the case of TP, the procedure \fItpclnp_input()\fR is called:
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+tpclnp_input(m, src, dst, len)
+.T&
+l l l.
+ +struct mbuf+*m;+/* DT NPDU */
+ +struct iso_addr+*src;+/* source of packet */
+ +struct iso_addr+*dst;+/* destination of packet */
+ +int+len;+/* length of clnp header */
+.TE
+\fR
+.)b
+\fIM\fR contains the entire DT NPDU packet. \fILen\fR indicates the size
+of the CLNP header. In other words, the user data of the DT NPDU begins
+\fIlen\fR bytes into \fIm\fR. \fISrc\fR and \fIdst\fR indicate the
+source and destination NSAP addresses of the packet.
+.sh 3 "CLNP/Subnetwork Interface"
+.pp
+The design of the interface between the subnetwork and the CLNP is
+determined by the design of the Unix network interface drivers. CLNP
+follows the conventional mechanisms for exchanging packets with a network
+interface. See the section on Network Interface Drivers in Chapter Five
+for more information on these conventions.
+.sh 2 "IPC (\*(lqRaw\*(rq) Interface"
+.pp
+The IPC interface to the CLNS allows direct (called \*(lqraw\*(rq)
+access to CLNP.
+This interface is intended for testing and debugging only.
+Its use results in the
+transmission of CLNP datagrams with nonstandard identification fields.
+These raw packets may be rejected by a system not employing the same
+convention. See the section on network implementation for more information
+about the conventions.
+.pp
+In order to gain access to the raw interface
+a \fIsocket\fR, with address family AF_ISO and type SOCK_RAW must be created.
+With this socket in hand,
+the system calls \fIsendto()\fR and \fIrecvfrom()\fR can be used to
+transmit and receive raw CLNP datagrams.
+.sh 3 "Sending raw datagrams"
+.pp
+The format of the \fIsendto()\fR system call is:
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+cc = sendto(s, msg, len, flags, to, tolen)
+.T&
+l l l.
+int+cc,s;
+char+*msg;
+int+len,flags;
+struct sockaddr+*to;
+int+to;
+.TE
+\fR
+.)b
+\f\fIS\fR is the socket previously created. \fIMsg\fR is a pointer to
+the data for the NPDU. CLNP will prepend a header to this data before
+transmission. \fILen\fR specifies the number of bytes of data. The
+\fIflags\fR parameter is unused and should be zero. \fITo\fR specifies the
+NSAP address to be used as the destination address. The size (in bytes)
+of \fIto\fR is given in \fItolen\fR. CLNP will automatically insert
+the source address based upon the route taken by the packet. The number of
+user data bytes transmitted is returned as \fIcc\fR. See \fIsendto(2)\fR
+for more information on this system call.
+.sh 3 "Receiving raw datagrams"
+.pp
+The format of the \fIrecvfrom()\fR system call is:
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+cc = recvfrom(s, buf, len, flags, from, fromlen)
+.T&
+l l l.
+int+cc,s;
+char+*buf;
+int+len,flags;
+struct sockaddr+*from;
+int+*fromlen;
+.TE
+\fR
+.)b
+When used with a CLNP raw socket \fIs\fR, \fIrecvfrom()\fR will read a
+NPDU from the CLNS. If no packet is available, the call will block.
+\fIBuf\fR specifies a buffer of \fIlen\fR bytes into which the NPDU will
+be read. The entire packet, including the header, will be read into the
+buffer. The \fIflags\fR parameter is unused, and should be zero. If
+\fIfrom\fR is non-zero, the source address of the NPDU is filled in.
+\fIFromlen\fR is a value-result parameter, initialized to the size of
+the buffer associated with \fIfrom\fR, and modified on return to
+indicate the actual size of the address stored there. The total number
+of bytes received (header and data) is returned as \fIcc\fR.
+See \fIrecvfrom(2)\fR for more information about this system call.
+.sh 1 "Connection Oriented Network Service"
+.pp
+The ARGO Connection Oriented Network Service (CONS) is not a complete
+implementation of the
+OSI network service.
+It is that subset of the OSI network service that is used
+by ARGO Transport and by ARGO CLNP.
+.\" FIGURE
+.so ../wisc/figs/NS_primitives.nr
+.pp
+.CF
+shows which CONS service elements are provided.
diff --git a/share/doc/iso/ucb/program.nr b/share/doc/iso/ucb/program.nr
new file mode 100644
index 0000000..a431661
--- /dev/null
+++ b/share/doc/iso/ucb/program.nr
@@ -0,0 +1,42 @@
+.\"$Header: program.nr,v 1.1 88/12/05 18:10:57 nhall Exp $
+.\"$Source: /usr/argo/doc/kernel/RCS/program.nr,v $
+.\"
+.\"
+.\" FONT CONVENTIONS
+.\"
+.\" \fIprocedure()\fR
+.\" \fIsyscall()\fR
+.\" \fImanpage(3)\fR
+.\" \fIdata_structure_name\fR
+.\" \fC/file/or/directory/name\fR
+.\" \fC
+.\" section of code
+.\" \fR
+.\"
+.\"
+.\" LOOK FOR ALL CASES OF 'writing' (as in, "at this writing")
+.\" to be sure you've updated everything before distributing this!
+.\"
+.\"This file uses -me and tbl macros.
+.so macros.nr
+.\" .pn 1
+.IC
+.IF
+.\" .(l C
+.\" .sz 16
+.\" Berkeley's Hack at the first 6 chapters of
+.\" Wisconsin ARGO 1.0 Kernel Programmer's Guide for
+.\" Academic Operating Systems 4.3
+.\" .sz 8
+.\" .)l
+.he 'ARGO 1.0 Berkeley Revision User\'s Guide'''
+.fo '%''December 9, 1988'
+.\" .bp
+.so intro.nr
+.so def.nr
+.so trans_serv.nr
+.so net_serv.nr
+.so ipc.nr
+.so addr.nr
+.bp
+.xp
diff --git a/share/doc/iso/ucb/trans_serv.nr b/share/doc/iso/ucb/trans_serv.nr
new file mode 100644
index 0000000..2f6d156
--- /dev/null
+++ b/share/doc/iso/ucb/trans_serv.nr
@@ -0,0 +1,697 @@
+.NC "Transport Service Interface"
+.sh 1 "General"
+.pp
+It is assumed that the reader is acquainted with the
+set of system calls and library routines that
+compose the
+Berkeley
+Unix interprocess communication service (IPC).
+To every extent possible
+the ARGO transport service is provided by the same IPC mechanisms
+that support
+the transport-level services
+included in the
+AOS distribution.
+In some instances, the interface
+provided by AOS does not support
+the services required by ISO 8073,
+so system calls were added to support these services.
+It is felt that this is superior to modifying
+existing system calls, in order to avoid the
+recoding of existing Unix utilities.
+.pp
+What follows is a description of the system calls
+that are used to provide the transport service.
+According to Unix custom,
+the return value of a system call is 0
+if the call succeeds and -1 if
+the call fails for any reason.
+In the latter case,
+the global variable
+\fIerrno\fR contains more information
+about the error that caused the failure.
+In the descriptions of all the system calls for which
+this custom is followed,
+the return value is named
+\fIstatus\fR.
+.sh 1 "Connection establishment"
+.pp
+Establishing a TP connection is similar to
+establishing a connection using any other
+transport protocol supported by Unix.
+The same system calls are used, and the passive open
+is required.
+Some of the parameters to the system calls differ.
+.pp
+The following call creates a communication endpoint called a
+\fIsocket\fR.
+It returns
+a positive integer called a
+\fIsocket descriptor\fR,
+which
+will be a parameter in all communication primitives.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+s = socket( af, type, protocol )
+.T&
+l l l.
+ +int+s,af,type,protocol;
+.TE
+\fR
+.)b
+.pp
+The \fIaf\fR parameter describes the format of addresses
+used in this communication.
+Existing formats include AF_INET (DoD Internet addresses),
+AF_PUP (Xerox PUP-I Internet addresses), and AF_UNIX
+(addresses are Unix file names, for intra-machine IPC only).
+TP runs in either the Internet domain or the ISO domain (AF_ISO).
+When using the Internet domain, the network layer is the DoD Internet IP
+with Internet-style addresses. The ISO domain uses the ISO
+network service and ISO-style addresses\**.
+.(f
+\**ISO/DP 8348/DAD2 Addendum to the Network
+Service Definition Covering Network Layer Addressing.
+.)f
+Regardless of the address family used, an address takes the
+general form,
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct sockaddr {
+.T&
+l l l l.
+ +u_char+sa_len;+/* length of sockaddr */
+ +u_char+sa_family;+/* address family */
+ +char+sa_data[14];+/* space for an address */
+}+
+.TE
+\fR
+.)b
+.lp
+.i
+A sockaddr is no longer required to be precisely 16 bytes long.
+The allocation of 14 bytes for sa_data is intended for backwards
+compatibility.
+.r
+.sp 1
+When viewed as an Internet address, it takes the form
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct sockaddr_in {
+.T&
+l l l l.
+ +u_char+sin_len;+/* address length */
+ +u_char+sin_family;+/* address family */
+ +u_short+sin_port;+/* internet port */
+ +struct in_addr+sin_addr;+/* network addr A.B.C.D */
+ +char+sin_zero[8];+/* unused */
+}
+.TE
+\fR
+.)b
+.sp 1
+When viewed as an ISO address, as supplied by the
+university of wisconsin, it takes the form
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct sockaddr_iso {
+.T&
+l l l l.
+ +u_char+siso_len;+/* address length */
+ +u_char+siso_family;+/* address family */
+ +u_short+siso_tsuffix;+/* transport suffix */
+ +struct iso_addr+siso_addr;+/* ISO NSAP addr */
+ +char+siso_zero[2];+/* unused */
+}
+.TE
+\fR
+.)b
+The address described by a \fIsockaddr_iso\fR structure
+is a TSAP-address (transport service access point address).
+It is made of an NSAP-address (network service access point address)
+and a TSAP selector (also called a transport suffix or
+transport selector, hereafter called a TSEL).
+The structure \fIsockaddr_iso\fR contains a 2-byte TSEL.
+This is for compatibility with Internet addressing.
+ARGO supports
+TSELs of length 1-64 bytes.
+TSELs of any length other than 2
+are called \*(lqextended TSELs\*(rq.
+They are described in detail in the section \fB\*(lqExtended TSELs\*(rq\fR.
+If extended TSELs are not requested, 2-byte TSELs are used by default.
+.pp
+Refer to Chapter Five for more information about ISO NSAP-addresses.
+.pp
+.i
+It is our intent at Berkeley to revamp the sockaddr_iso
+to use a more natural and uniform model, for ISO addresses.
+We cannot guarantee this modification to be complete by the
+time we are ready to have something for NIST to test.
+We hope to remove this notion of extended TSEL's as soon as
+possible, certainly by formal beta testing of 4.4.
+.r
+Since sockaddr can be 108 bytes long without breaking anything
+in the current Berkeley kernel, we should be able to eliminate
+extended TSEL's entirely by
+providing a sockaddr_iso along the lines of:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct sockaddr_iso {
+.T&
+l l l l.
+ +u_char+siso_len;+/* address length */
+ +u_char+siso_family;+/* address family */
+ +u_char+siso_slen;+/* session suffix length */
+ +u_char+siso_tlen;+/* transport suffix length */
+ +u_char+siso_nlen;+/* nsap length */
+ +char+siso_data[22];+/* minimum nsap + tsel */
+}
+.TE
+\fR
+.)b
+.pp
+The \fItype\fR parameter in the \fIsocket()\fR call
+distinguishes
+datagram protocols, stream protocols, sequenced
+packet protocols, reliable datagram protocols, and
+"raw" protocols (in other words, the absence of a transport protocol).
+Unix provides manifest named constants for each of these types.
+TP supports the sequenced packet protocol abstraction, to which
+the manifest constant SOCK_SEQPACKET applies.
+.pp
+The \fIprotocol\fR
+parameter is an integer that identifies the protocol to be used.
+Unix provides a database of protocol names and their associated
+protocol numbers.
+Unix also provides user-level tools
+for searching the database.
+The tools take the form of library routines.
+A protocol number for TP has been chosen
+by the Internet NIC to allow TP to run in the Internet domain, and this
+has been added to the Unix network protocol database.
+The standard Internet database tools that serve TCP users
+can
+also serve user of TP
+in the Internet domain, if the TP protocol number is added to the
+proper Internet database file,
+\fC/etc/protocols\fR.
+This change must be made for TP to run in either the Internet or
+in the ISO domain.
+The ARGO package contains a set of tools and a database
+for use with TP in the ISO domain.
+This set of tools is described in the manual pages
+\fIisodir(5)\fR and
+\fIisodir(3)\fR.
+.pp
+When a socket is created, it is not given an address.
+Since a socket cannot be reached by a remote entity unless it has an address,
+the user must request that a socket be given an address by
+using the \fIbind()\fR system call:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = bind( s, addr, addrlen )
+.T&
+l l l.
+ +int+s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.pp
+The address is expected to be in the format specified by the
+\fIaf\fR parameter to the \fIsocket()\fR
+call that yielded the socket descriptor \fIs\fR.
+If the user
+passes an address parameter with a zero-valued transport suffix,
+the transport layer
+assigns an unused 2-byte transport selector.
+This is a 4.3 Unix convention; it is not part of any ISO standard.
+.pp
+The \fIconnect()\fR system call effects an active open.
+It is used to establish a connection with an entity that is
+passively waiting for connection requests, and whose
+transport address is known.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = connect( s, addr, addrlen )
+.T&
+l l l.
+ +int+s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.pp
+The first parameter is a socket descriptor.
+The \fIaddr\fR parameter is a transport address in the format
+specified by the \fIaf\fR parameter to the \fIsocket()\fR
+call that yielded the socket descriptor \fIs\fR.
+.pp
+A passive open is accomplished with two system calls,
+\fIlisten()\fR followed by \fIaccept()\fR.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = listen( s, queuelen )
+.T&
+l l l.
+ +int+s;
+ +int+queuelen;
+.TE
+\fR
+.)b
+.pp
+The \fIqueuelen\fR argument specifies the maximum
+number of pending connection
+requests that will be queued for acceptance by this user.
+Connections are then accepted by the
+system call \fIaccept()\fR.
+There is no way to refuse connections.
+The functional equivalent of connection
+refusal is accomplished by accepting a connection and immediately
+disconnecting.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+new_s = accept( s, addr, addrlen )
+.T&
+l l l.
+ +int+new_s, s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.pp
+The \fIaccept()\fR call completes the connection
+establishment. If a connection request from a prospective peer
+is pending on the socket described by \fIs\fR, it is removed and
+a new socket is created for use with this connection.
+A socket descriptor for the new socket is returned by the
+system call.
+If no connection requests are pending, this call blocks.
+If the \fIaccept()\fR call fails, -1 is returned.
+The transport address of the entity requesting the connection
+is returned in the \fIaddr\fR parameter, and the length
+of the address is returned in the \fIaddrlen\fR parameter.
+The address associated with the new socket is inherited
+from the socket on which the \fIlisten()\fR and \fIaccept()\fR were performed.
+.pp
+It is possible for the \fIaccept()\fR call to be interrupted
+by an asynchronous event such as the arrival of expedited
+data.
+When system calls are interrupted, Unix returns the value -1
+to the caller and puts the constant
+EINTR in the global variable \fIerrno\fR.
+This can create problems with the system call \fIaccept()\fR.
+In the case of incoming expedited data, the interruption does
+not indicate a problem, but the data may have arrived before
+the caller has received the new socket descriptor, which is the
+socket descriptor on which the expedited data are to be received.
+In order to prevent this problem from occurring, the caller must
+prevent the issuance of asynchronous indications until the
+\fIaccept()\fR
+call has returned.
+Asynchronous indications are discussed below, in
+the section titled
+"Indications from the transport layer to the transport user".
+.pp
+It is possible to discover the
+address bound to a
+socket with the
+\fIgetsockname()\fR system call.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = getsockname( s, addr, addrlen )
+.T&
+l l l.
+ +int+s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.pp
+If the socket has a peer, that is, it is connected,
+the system call
+\fIgetpeername()\fR
+is used to discover the peer's address.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = getpeername( s, addr, addrlen )
+.T&
+l l l.
+ +int+s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.lp
+The names returned by
+\fIgetsockname()\fR and \fIgetpeername()\fR
+do not contain extended TSELs.
+Extended TSELs can be retrieved with
+the \fIgetsockopt()\fR and
+\fIsetsockopt()\fR system calls, described below.
+.pp
+Unix supports several protocol-independent options
+and protocol-specific options
+associated with sockets.
+These options can be inspected and changed by using
+the \fIgetsockopt()\fR and
+\fIsetsockopt()\fR system calls.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = getsockopt( s, level, option, value, valuelen )
+.T&
+l l l.
+ +int+s, level, option;
+ +char+*value;
+ +int+*valuelen;
+.TE
+\fR
+.)b
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = setsockopt( s, level, option, value, valuelen )
+.T&
+l l l.
+ +int+s, level, option;
+ +char+*value;
+ +int+valuelen;
+.TE
+\fR
+.)b
+.pp
+The \fIlevel\fR argument may indicate
+either
+that this option applies to sockets or that it applies to
+a specific protocol.
+The constants SOL_SOCKET, SOL_TRANSPORT, and SOL_NETWORK
+are possible values for the \fIlevel\fR argument.
+The \fIoption\fR argument is an integer that identifies
+the option chosen.
+.\" LIST THE OPTIONS HERE
+The options available to TP users provide the
+user with the ability to control various TP protocol options
+including but not limited to
+TP class, TPDU size negotiated, TPDU format used,
+acknowledgment and retransmission strategies.
+For a detail list of the options, see the manual page \fItp(4p)\fR.
+.sh 1 "Extended TSELs"
+.pp
+ARGO supports TSELs
+of length 1 byte - 64 bytes for sockets bound to addresses in the
+AF_ISO address family.
+The ARGO user program uses the
+\fIgetsockopt()\fR
+and
+\fIsetsockopt()\fR
+system calls to
+discover and assign extended TSELs.
+.pp
+To create a socket with an extended TSEL,
+the process
+.ip \(bu 5
+opens a socket with \fCsocket(AF_ISO, SOCK_SEQPACKET, ISOPROTO_TP)\fR
+.ip \(bu 5
+binds an NSAP-address to the socket with \fIbind()\fR.
+The address bound may contain a 2-byte selector (\fIiso_tsuffix\fR).
+.ip \(bu 5
+uses \fIsetsockopt()\fR with the command TPOPT_MY_TSEL,
+to assign a TSEL to the socket.
+.ip \(bu 5
+calls \fIlisten(), connect()\fR, or any other appopriate system calls
+to use the socket as desired.
+.lp
+To connect to a transport entity that is bound to a TSAP-address with
+an extended TSEL, the
+process
+.ip \(bu 5
+opens a socket with \fCsocket(AF_ISO, SOCK_SEQPACKET, ISOPROTO_TP)\fR
+.ip \(bu 5
+uses \fIsetsockopt()\fR, with the command TPOPT_PEER_TSEL,
+to assign a PEER TSEL to the socket.
+This TSEL is used by the transport entity
+for all subsequent connect requests made on this socket,
+unless the peer TSEL is changed by another call to
+\fIsetsockopt()\fR employing the command TPOPT_PEER_TSEL.
+.lp
+To discover the TSEL of the peer of a connected socket,
+the process
+.ip \(bu 5
+uses \fIgetsockopt()\fR with the command TPOPT_PEER_TSEL.
+.lp
+To discover the TSEL of socket's own address,
+the process
+.ip \(bu 5
+uses \fIgetsockopt()\fR with the command TPOPT_MY_TSEL.
+.sh 1 "Data transfer"
+.pp
+Earlier BSD-based systems have provided system calls for data transfer
+having bugs and semantics that are problematic for TP.
+These should be correct as presented in the test system.
+The problem was in the manner in which the kernel
+handled interrupted system calls.
+The send and receive primitives
+may be interrupted by signals.
+A signal is the mechanism used to indicate
+the presence of expedited data or out-of-band data.
+If the send primitive as interrupted before completion,
+the user could not determine how many octets of data were sent.
+All forms of the existing interface
+(\fIsend()\fR,
+\fIrecv()\fR,
+\fIsendmsg()\fR,
+\fIrecvmsg()\fR,
+\fIsendto()\fR,
+\fIrecvfrom()\fR,
+\fIwrite()\fR,
+\fIread\fR,
+\fIwritev()\fR,
+and \fIreadv()\fR system calls)
+return an octet count
+when the system call completes, and to return a short count
+if the system call is interrupted.
+.pp
+The system calls sendmsg and recvmsg
+have been revised to make them more convenient for receipt of
+out of band data.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+cc = sendmsg( s, msg, flags )
+.T&
+l l l.
+ +int+s;
+ +istruct msghdr+msg;
+ +unsigned int+flags;
+.TE
+\fR
+.)b
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+cc = recvmsg( s, msg, flags )
+.T&
+l l l.
+ +int+s;
+ +istruct msghdr+msg;
+ +unsigned int+flags;
+.TE
+\fR
+.)b
+.pp
+The reader should now consult the manual page for recvmsg
+for an explanation of the elements of the msghdr structure,
+and how the calls may be used to glean user-connection-request data.
+.pp
+The \fIflags\fR parameter serves several purposes.
+The TP specification requires that TSDUs be unlimited in size.
+System calls cannot pass unlimited amounts of data between the user
+and the kernel, so
+there cannot be a one-to-one correspondence between TSDUs and
+system calls.
+The \fIflags\fR
+parameter is used to mark the end-of-TSDU on sending data.
+When receiving,
+TP sets this bit
+in the flags element of the msghdr structure
+when the end of a TSDU is consumed.
+This way one TSDU can span several system calls.
+It is possible for the peer to send an empty TPDU with the end-of-TSDU
+flag set, in which case the transport user
+may receive zero octets with the end-of-TSDU flag set.
+.pp
+The \fIflags\fR parameter also serves to distinguish data transfer primitives
+from expedited data transfer primitives.
+The flag bit MSG_OOB is provided for "out of band data" in the
+DoD Internet protocols. It is also used to provide the expedited data service
+of the ISO protocols.
+The transport layer will deliver one expedited datum (there will be a
+one-to-one correspondence between expedited TSDUs and XPD TPDUs)
+at a time.
+The user must receive the datum before the transport
+layer will accept more expedited data.
+Each expedited datum my contain up to 16 octets.
+.pp
+.sh 1 "Disconnection"
+.pp
+The \fIclose\fR system call will disconnect any association
+between two TP entities.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = close( s )
+.T&
+l l l.
+ +int+s;
+.TE
+\fR
+.)b
+.pp
+The argument \fIs\fR is a socket descriptor.
+If a Unix user process terminates, Unix will close all files and
+sockets associated with the process, which means all transport
+connections associated with the process will be disconnected.
+.sh 1 "Indications from the transport layer to the transport user"
+.pp
+The above set of system calls allows you to establish
+a connection, transfer data, and disconnect.
+The
+presence or reception of expedited data is indicated
+by TP setting the MSG_OOB bit in the flags element of the msg structure.
+A disconnection initiated by the peer or by one of the
+cooperating TP entities can be signalled by a control message,
+although we have not yet implemented this.
+.pp
+The Unix signal mechanism may be used to provide these
+service elements, as well.
+When an expedited data TSDU arrives, the TP may interrupt
+the user with a SIGURG signal ("urgent condition present on socket").
+The user must have previously registered a procedure to handle
+the signal by using the \fIsigvec()\fR system call or the
+\fIsignal()\fR library routine provided for that purpose.
+The signal handler takes the form
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+int sighandler( signal_number)
+.T&
+l l l.
+ +int+signal_number;
+.TE
+\fR
+.)b
+.pp
+The \fIsignal_number\fR argument will be the well-known constant SIGURG.
+There are two reasons for
+the transport layer to issue
+a SIGURG:
+expedited data
+are present or
+disconnection was initiated by a transport entity or by the peer.
+Should the user have more than one transport connection open,
+another system call is used to determine to which socket(s)
+the urgent condition applies.
+This is the \fIselect()\fR system call, described below.
+.pp
+When the SIGURG indicates a disconnection, there may be
+user data from the peer present.
+TP saves the disconnect data for the user to receive via the
+\fIgetsockopt()\fR system call, or through the
+.IR recvmsg ()
+ancillary data mechanism.
+.\"
+.\"If the user does not receive the disconnect data before the
+.\"reference timer expires, the data will be discarded and the
+.\"socket will be closed.
+.pp
+Transport service users may use more than one transport
+connection at a time.
+The \fIselect()\fR system call facilitates this.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+#include <sys/types.h>
++
+nfound = select( num_to_scan, recvmask, sendmask,
++exceptmask, timeout )
+.T&
+l l l.
+ +int+nfound, num_to_scan;
+ +fd_set+*recvmask, *sendmask, *exceptmask;
+ +time+timeout;
+.TE
+\fR
+.)b
+.pp
+This system call takes as parameters a set of masks
+that specify a subset of the socket descriptors that are in
+use by the user program.
+\fISelect()\fR inspects the sockets to see if they have data
+to be received, can service a send without blocking, or
+have an exceptional condition pending, respectively.
+The masks will be set upon return to indicate the socket descriptors
+for which the respective conditions exist.
+The \fInum_to_scan\fR argument limits the number of sockets that are
+inspected.
+The call will return within the amount of time given in the
+\fItimeout\fR parameter, or, if the parameter is zero, \fIselect()\fR
+will block indefinitely.
+.\" FIGURE
+.so ../wisc/figs/TS_primitives.nr
+.pp
+.CF
+summarizes the mapping of the transport service primitives
+to Unix facilities.
diff --git a/share/doc/iso/wisc/Makefile b/share/doc/iso/wisc/Makefile
new file mode 100644
index 0000000..7d9aa87
--- /dev/null
+++ b/share/doc/iso/wisc/Makefile
@@ -0,0 +1,73 @@
+#
+# Makefile for the tp documents:
+# design: TP design/source guide
+# appendix_a: index of tp kernel routines & macros by macro/routine name
+# appendix_b: index of tp kernel routines & macros by file name
+#
+PRINTER = 3a
+TAGS = ../../sys/tags
+SRCS = ../../sys/netargo/tp_*.c ../../sys/netargo/tp_*.h ../../sys/netargo/tp*.trans
+TROFF = /usr/local/lib/troff
+
+#
+# Print via speedy for cycles sake...
+# (assumes postscript printer...)
+#
+program:
+ @echo printer is $(PRINTER)
+ (cd figs; make)
+ format -P$(PRINTER) -t program.nr | rsh speedy psdit \| lpr -P$(PRINTER)
+
+parts:
+ @echo printer is $(PRINTER)
+ (cd figs; make)
+ format -P$(PRINTER) -t parts.nr | rsh speedy psdit \| lpr -P$(PRINTER)
+# format -P$(PRINTER) -t parts.nr > /dev/null
+# soelim parts.nr | grn -P$(PRINTER) |\
+# $(TROFF) -Tpsc | rsh speedy psdit \> /tmp/test
+# soelim parts.nr | tbl > /tmp/parts.nr
+
+clean:
+ /bin/rm -f core junk* a.out *.o spell_errs made
+ touch spell_errs
+
+spell:
+ (cd figs; make)
+ (cd ../icon; make)
+ /usr/ucb/soelim program.nr | /usr/bin/spell -d hlista > spell_errs
+
+newdict:
+ cat spell_errs | spellin /usr/dict/hlista > hlista
+
+all: program appendix_a appendix_b appendix_c
+
+
+appendix_c:
+ format -P$(PRINTER) appendix_c.nr
+ tbl ../man/man4/table1.src > ../man/man4/table1.nr
+ tbl ../man/man4/table2.src > ../man/man4/table2.nr
+ tbl ../man/man4/table3.src > ../man/man4/table3.nr
+ soelim ../man/man4/tp.4p.src > ../man/man4/tp.4p
+ ditroff -man -P$(PRINTER) ../man/man1/xebec.1
+ ditroff -man -P$(PRINTER) ../man/man2/sendv.2
+ ditroff -man -P$(PRINTER) ../man/man2/recvv.2
+ ditroff -man -P$(PRINTER) ../man/man3/libtp.3
+ ditroff -man -P$(PRINTER) ../man/man4/tp.4p
+ ditroff -man -P$(PRINTER) ../man/man8/tppt.8
+ ditroff -man -P$(PRINTER) ../man/man8/tpdebug.8
+ ditroff -man -P$(PRINTER) ../man/man8/tpstat.8
+
+appendix_a:
+ ctags -x $(SRCS) | awk '{printf("%s %s %s\n", $$1, $$3, $$2)}'\
+ | sed -e 's-../../sys/netargo/--' > index_by_func.nr
+ format -P$(PRINTER) appendix_a.nr
+
+appendix_b:
+ ctags -x $(SRCS) | awk '{printf("%s %s %s\n", $$3, $$1, $$2)}'\
+ | sed -e 's-../../sys/netargo/--' \
+ | sort \
+ | fmtxref -w 80 \
+ | sed -e 's/ / /' \
+ -e 's/ / /' \
+ > index_by_file.nr
+ format -P$(PRINTER) appendix_b.nr
diff --git a/share/doc/iso/wisc/Outline b/share/doc/iso/wisc/Outline
new file mode 100644
index 0000000..30ea7b8
--- /dev/null
+++ b/share/doc/iso/wisc/Outline
@@ -0,0 +1,18 @@
+Ch 1 Intro
+Ch 2 Definitions
+Ch 3 Transport Service Interface
+Ch 4 Network Service Interface
+ - user interface
+ - kernel interface
+Ch 5 Addressing
+Ch 6 Design of Transport
+Ch 7 Design of Network
+Ch 8 Error Handling
+ - transport
+ - network
+Ch 9 Guide to Transport Source Code
+Ch 10 Guide to Network Source Code
+Ch 11 Guide to Common Source Code
+Ch 12 Testing & Debugging
+ - transport
+ - network
diff --git a/share/doc/iso/wisc/TODO b/share/doc/iso/wisc/TODO
new file mode 100644
index 0000000..8e7bf64
--- /dev/null
+++ b/share/doc/iso/wisc/TODO
@@ -0,0 +1,2 @@
+update clnp echo doc
+add esis
diff --git a/share/doc/iso/wisc/addr.nr b/share/doc/iso/wisc/addr.nr
new file mode 100644
index 0000000..1133422
--- /dev/null
+++ b/share/doc/iso/wisc/addr.nr
@@ -0,0 +1,155 @@
+.NC "NSAP Addresses & Routing"
+.sh 1 "OSI Address Formats"
+.pp
+ARGO supports an ISO address family, AF_ISO, in addition to the
+DoD Internet address family, AF_INET.
+Addresses in the family AF_ISO
+take the form described by ISO 8348/DAD2, which is an addendum to the
+OSI network service standard that describes network layer addressing.
+.sh 2 "ISO 8348/DAD2 Tutorial"
+.pp
+.\" FIGURE
+.\".so figs/osi_addr.nr
+.so figs/addrfmt.nr
+.CF
+shows the
+format of an OSI NSAP-address.
+The address has two major parts: the initial domain part
+(IDP) and the domain specific part (DSP). The IDP is further divided into
+two parts: the authority and format identifier (AFI) and the
+initial domain identifier (IDI). The AFI specifies the format of the
+IDI, the authority responsible for allocating values of the IDI, and
+the syntax of the DSP. The IDI specifies the network addressing domain
+from which DSP values are allocated, and the authority responsible for
+allocating DSP values.
+.sh 2 "Supported Formats"
+.pp
+ARGO supports three types of ISO NSAP-addresses:
+one type with AFI 37(hex) and two types with AFI 47(hex).
+.sh 3 "AFI 37"
+.pp
+This value of the AFI defines the IDI to be an X.121 address or DTE
+address.
+The DTE address is encoded in binary coded decimal.
+The DSP syntax is binary.
+This form is intended to be used when communicating
+across a public data network (PDN).
+The ARGO software and documentation
+refer to this type of NSAP-address as a
+\*(lqtype 37.\*(rq
+address.
+.sh 3 "AFI 47"
+.pp
+The value of 47 for the AFI defines the IDI to be a 4 digit International
+Code Designator (ICD) allocated according to ISO 6523.
+ARGO support two
+ICD values.
+.sh 4 "ICD 0004"
+.pp
+The ICD value of 0004 is assigned to OSINET,
+an experimental OSI network overseen by
+National Institute of Science and Technology.\**
+.(f
+\** formerly the National Bureau of Standards
+.)f
+When this style of NSAP-address
+is used,
+the DSP is divided into four parts: an organization identifier (2 bytes),
+a subnet identifier (2 bytes),
+an SNPA-part (4-8 bytes), and
+an NSAP selector (1 byte).
+The use of these fields is defined by the OSINET steering committee.
+This type of address is known as an
+\*(lqOSINET\*(rq
+address.
+.sh 4 "ICD 0006"
+.pp
+The ICD value of 0006 is assigned to the Department of Defense (DoD).
+In ARGO, NSAP-addresses with an ICD value of 0006
+are of the format defined in RFC 986, a proposal for embedding DARPA Internet
+addresses within an OSI NSAP-address.
+In this case, the DSP takes the form:
+version (1 byte), DARPA Internet Address (4 bytes), upper layer protocol
+identifier (1 byte).
+This is called an
+\*(lqrfc986\*(rq
+address.
+.sh 1 "Internal Representation"
+.pp
+Internally, an NSAP address takes the form
+.(b
+\fC
+.TS
+tab(+);
+l s s s s.
+struct iso_addr {
+.T&
+l l l l l.
++u_char+isoa_afi;+/* authority &
++++format id */
++union+{+
+++struct addr_37+addr_37;+/* x.121 */
+++struct addr_osinet+addr_osinet;+/* OSINET */
+++struct addr_rfc986+addr_rfc986;+/* Internet*/
++}+isoa_u;
++u_char+isoa_len;+/* length */
+}
+.TE
+\fR
+.)b
+The field \fIisoa_afi\fR contains the AFI for the address.
+The union
+\fIisoa_u\fR contains the remainder of the address.
+The length of the entire address (the AFI, IDI, and DSP) is
+stored in \fIisoa_len\fR.
+.sh 1 "Network Layer Routing"
+.pp
+Routing at the network layer is performed by the
+routing procedure \fIrtalloc()\fR as described in Chapter 5.
+\fIRtalloc()\fR was designed for used in the DoD Internet
+domain.
+An unfortunate
+effect of this is that routing decisions are based upon either the
+entire NSAP address or the network portion of the address.
+The problem is defining the network portion of an NSAP address.
+The location and extent of the
+network portion of an NSAP address depends on the
+style of NSAP address.
+This decision is made by the function \fIiso_netof()\fR.
+.sh 2 "Network Portion of Type 37 Addresses"
+.pp
+There is no network portion of an X.121 address.
+In ARGO, the network portion of a type 37 address
+is defined to be just the AFI.
+The obvious consequence of this is that all type 37 addresses will
+match all other type 37 addresses
+in a network-portion comparison.
+.sh 2 "Network Portion of OSINET Addresses"
+.pp
+The network portion of an OSINET address is the organization identifier and
+the subnet identifier.
+.sh 2 "Network Portion of RFC986 Addresses"
+.pp
+The network portion of an RFC986 address is the network portion of the
+embedded DARPA Internet address.
+ARGO does not support subnetting, a method of subdividing Internet addresses.
+.sh 1 "NSAP Address / Subnetwork Point of Attachment Mapping"
+.pp
+In order to transmit a packet on a real subnetwork, the destination
+NSAP address
+must be mapped to an SNPA address.
+An SNPA address is the real, "hardware" address
+of a system on a network.
+This address corresponds to the 6 byte Ethernet or Token Ring
+Adapter address,
+or to the DTE address, which may be up to 7 bytes
+long (14 decimal digits).
+.pp
+A table, \fIsnpa_cache\fR is kept in the kernel which contains the
+translation between NSAP-addresses and SNPA-addresses.
+This table is used by \fIiso_snparesolve()\fR whenever a
+datagram must be dispatched.
+The table is maintained by the the ISO ES-IS protocol entity.
+Entries can be added and deleted
+by the user program \fIclnlutil(8)\fR and
+by the CONS entity.
diff --git a/share/doc/iso/wisc/appendix_a.nr b/share/doc/iso/wisc/appendix_a.nr
new file mode 100644
index 0000000..2522854
--- /dev/null
+++ b/share/doc/iso/wisc/appendix_a.nr
@@ -0,0 +1,51 @@
+.\" $Header: appendix_a.nr,v 1.3 88/12/07 10:42:12 nhall Exp $
+.(x
+Appendix A
+.)x
+.bp
+.sz +2
+.ce 3
+Appendix A
+
+\fBStandards Documents\fR
+.sz -2
+.lp
+The following appendix lists many of the standards documents which were
+consumed during development.
+.ip "\fBNetwork Layer\fR"
+.ip "ISO 8348" 15
+Network Service Definition
+.ip "ISO 8348/AD1" 15
+Connectionless-mode Transmission
+.ip "ISO 8348/AD2" 15
+Network Layer Addressing
+.ip "ISO 8648" 15
+Internal Organization of the Network layer
+.ip "ISO 8473" 15
+Protocol for Providing the Connectionless Network Service
+.ip "ISO 8473/DAD1" 15
+Provision of the Underlying Service Assumed by ISO 8473
+.ip "ISO 8473/DAD2" 15
+Formal Description of ISO 8473
+.ip "ISO 9542" 15
+End System to Intermediate System Routing Exchange Protocol
+for use in conjuction with the Protocol providing the Connectionless-mode
+Network Service
+.ip "ISO 8208" 15
+X.25 packet level protocol for data terminal equipment
+.ip "ISO 8878" 15
+Use of X.25 to Provide the Connection-mode Network Service
+.ip "\fBTransport Layer\fR"
+.ip "ISO 8072" 15
+Transport Service
+.ip "ISO 8073" 15
+Transport Protocol
+.ip "ISO 8073/PDAD2" 15
+Class 4 over Connectionless Network
+.ip "\fBFunctional Standards Profiles\fI"
+.ip "OSINET" 15
+Implementation Agreements Among Participants of OSINET, December 1987
+.ip "NBS" 15
+Implementors's Agreements
+.ip "GOSIP" 15
+Government OSI Profile
diff --git a/share/doc/iso/wisc/appendix_b.nr b/share/doc/iso/wisc/appendix_b.nr
new file mode 100644
index 0000000..01ade55
--- /dev/null
+++ b/share/doc/iso/wisc/appendix_b.nr
@@ -0,0 +1,10 @@
+.\" $Header: appendix_b.nr,v 1.1 88/12/05 18:08:02 nhall Exp $
+.(x
+Appendix B
+.)x
+.bp
+.sz +2
+.ce 3
+Appendix B
+
+\fBManual Pages\fR
diff --git a/share/doc/iso/wisc/debug.nr b/share/doc/iso/wisc/debug.nr
new file mode 100644
index 0000000..352eeee
--- /dev/null
+++ b/share/doc/iso/wisc/debug.nr
@@ -0,0 +1,1043 @@
+.\"$Header: debug.nr,v 1.4 88/12/06 16:05:36 nhall Exp $
+.\"$Source: /usr/argo/doc/kernel/RCS/debug.nr,v $
+.\"
+.\" Program names should be in italics
+.\"
+.NC "Debugging, Testing, and Statistics"
+.sh 1 "Introduction"
+.pp
+This section describes the methods used
+to test and debug the ARGO kernel.
+Three facilities are used in combination:
+debug options,
+simple statistics gathering,
+and
+protocol event tracing.
+Many of the debug options
+simply cause information to be printed on the console, but
+several of these options cause
+TP to behave pathologically
+so that errors are be introduced in desirable ways.
+.pp
+TP and CLNP keep simple statistics.
+These statistics include such things as the total
+number of PDUs that are sent and received,
+a count of various types of errors
+that can be encountered when parsing an incoming PDU,
+and the average and standard deviation of round trip times for
+transport PDUs.
+These statistics are useful for debugging.
+For example,
+if an incoming CC TPDU is rejected because one of the optional
+parameters is faulty, this are noted in the statistics.
+The statistics are kept on a system-wide basis rather than
+on a per-connection basis.
+They can be printed or cleared by user-level utility programs.
+.pp
+The tracing facility allows selective tracing of events.
+Events are grouped into categories relating to different
+functions of TP.
+For example, it is possible to
+trace only the events that pertain to acknowledgments.
+.pp
+At run time the debugging and tracing options can
+be set and cleared by privileged utility programs.
+Each of these facilities is described in more
+detail below.
+.sh 1 "Debugging"
+.pp
+Most of the debugging options
+print messages on the console.
+Kernel printing is done by busy-waiting at high priority,
+so debugging options should be used very sparingly.
+A sample of the code is:
+.(b
+.nf
+\fC
+IFDEBUG(D_TPINPUT)
+ printf("tp_input m 0x%x tpdu_len 0x%x\n", m, tpdu_len);
+ENDDEBUG
+\fR
+.fi
+.)b
+.sp 1
+.lp
+IFDEBUG and ENDDEBUG are macros that are defined in one of two ways.
+If the system is configured with the ARGO_DEBUG
+option, an array
+\fCargo_debug[128]\fR
+is declared, and
+IFDEBUG and ENDDEBUG are defined thus:
+.(b
+.nf
+\fC
+#define IFDEBUG(option) if(argo_debug[option]) {
+#define ENDDEBUG ; }
+\fR
+.fi
+.)b
+.lp
+If the system is configured without the ARGO_DEBUG option, these
+two macros resolve to the C comment delimiters, \fC/*\fR and \fC*/\fR,
+causing all the debugging code lying between the macros
+to be elided.
+.pp
+TP, CLNP, and CONS debugging can be enabled independently.
+All debugging requires that the code be compiled with the
+option ARGO_DEBUG.
+The \fIconfig(8)\fR option CLNP_DEBUG will include debugging printfs for CLNP.
+TP_DEBUG has the same effect for TP.
+.pp
+The array elements of \fCargo_debug[]\fR are set by
+the utility program
+\fIbark\fR,
+which reads and writes
+\fC/dev/kmem\fIN\fR.
+See the manual page \fIbark(8)\fR.
+.pp
+Several debugging options cause TP to behave pathologically,
+for the purpose of reproducing difficult-to-reproduce
+error conditions that the protocol must correct.
+For example, the
+\fID_DROP\fR, or \fIbark -on T.drop\fR
+option causes
+\fItp_input()\fR
+to discard TPDUs on a pseudo-random basis.
+These will be described below.
+.sh 1 "Statistics"
+.pp
+.sh 2 "CLNP Statistics"
+.pp
+CLNP keeps a set of statistics related to its operation.
+Statistics include such things as NPDUs sent, received, and dropped.
+These statistics are stored in the global structure \fIclnp_stat\fR.
+The utility program \fInetstat(8)\fR may be used to print these statistics.
+.sh 2 "TP Statistics"
+.pp
+TP keeps a set of running counts of certain events.
+The statistics include such things as the numbers
+of each type of TPDU sent and received, TPDUs dropped,
+and the numbers of occurrences of certain types of errors.
+The statistics are stored in the global structure \fItp_stat\fR.
+The utility programs
+\fItpstat\fR and
+\fItpmon\fR
+read \fC/dev/kmem\fIN\fR
+and prints the contents of the statistics structure
+in a human-readable form.
+\fITpstat\fR prints the statistics on any ascii screen or printer.
+\fITpmon\fR uses the \fIcurses\fR library and assumes that is has
+a screen or window of size 53(long) X 80(wide), and it updates the
+screen every 30 seconds.
+.pp
+\fITpstat\fR and \fItpmon\fR can be used to clear the statistics (set them
+all to zero); the \fB-c\fR option causes the statistics to be cleared.
+.pp
+Statistics are observed using \fItpstat(8)\fR
+to clear statistics before a test, and to print
+the statistics after the test.
+See the manual pages \fItpstat(8)\fR and \fItpmon(8)\fR.
+.sh 1 "Tracing"
+.pp
+.sh 2 "CLNP Tracing"
+.pp
+CLNP does not support event tracing.
+.sh 2 "TP Tracing"
+.pp
+The tracing facility consists of a circular buffer (an array)
+of structures that are written by the kernel at various times,
+and a utility program that reads \fC/dev/kmem\fIN\fR
+to interpret the contents of the buffer.
+The trace structure is a union of the structures that
+will be interpreted by the utility program.
+A trace event consists of a call to the trace routine \fItpTrace\fR
+with a set of arguments containing the information relevant to the
+event being traced.
+The procedure tpTrace is actually called through a macro \fItptrace\fR.
+For example,
+.(b
+.nf
+\fC
+IFTRACE(D_INPUT)
+ tptrace(TPPTtpduin, h->tpdu_type, h, h->tpdu_li+1, 0, 0);
+ENDTRACE
+\fR
+.fi
+.)b
+.pp
+The tracing macros are defined in the same manner as the
+debugging macros:
+.(b
+.nf
+\fC
+#define IFTRACE(option) if(tp_traceflags[option]) {
+#define ENDTRACE }
+\fR
+.fi
+.)b
+.lp
+If the kernel is configured with the option TPPT, these macros
+are defined as shown above, but if the TPPT option is not
+used, these macros become C-style comment delimiters.
+.pp
+The tracing procedure copies \fIh->tpdu_li + 1\fR bytes beginning at
+location \fIh\fR into a trace structure in the circular buffer.
+The utility program \fItppt\fR
+reads the trace structure,
+interprets the data as a TPDU header,
+and prints the header in hexadecimal form, with a banner identifying
+the event as an incoming TPDU:
+.(b
+.nf
+\fC
+1a: Ref 22 arg 14(0xe), @ 91990 : 0000.453125 tpdu
+INPUT total len 22
+HDRLEN: 21+1 CR_TPDU_type cdt 0(0x0) dref 0x0
+ + 0: 0x15 0xe0 0x00 0x00 4: 0x00 0x03 0x00 0xc1
+ + 8: 0x06 0x74 0x70 0x70 12: 0x69 0x6e 0x67 0xc2
+ +16: 0x02 0x00 0x07 0xc0 20: 0x01 0x08 0x00 0x00
+
+\fR
+.fi
+.)b
+.pp
+In addition to the data copied from the arguments to tpTrace(),
+each trace structure contains
+a time stamp and an event sequence number, and in many cases, the
+connection reference to which the traced event applies.
+The utility program \fItppt\fR is be used to turn on and off the
+tracing options.
+.pp
+This facility can be used for debugging the source
+code as well as for studying the behavior of the protocol.
+For example, by adding the appropriate trace events,
+it is possible to "see" the resequencing function of TP
+working when a debug option is used to cause
+TPDUs to be dropped occasionally.
+.pp
+See the manual page \fItppt(8)\fR.
+.sh 1 "Testing"
+.pp
+.sh 2 "CLNP Testing"
+.pp
+CLNP was tested in two rather different ways.
+The first method of testing used the
+raw CLNP interface with the program \fIclnptest\fR.
+\fIclnptest\fR allows a user to send or receive CLNP NSDUs.
+With \fIclnptest\fR, a user can send CLNP NSDUs with various
+combinations of options and observe the result.
+.pp
+The second method of testing CLNP was to have TP use CLNP as its network
+layer protocol.
+This method provides a good stress test for CLNP.
+Unfortunately, TP generally calls CLNP in the same manner, so that not all
+of the CLNP options are exercised.
+.sh 3 "Clnptest"
+.pp
+The program \fIclnptest\fR can be invoked as either
+a reader or as a writer:
+.(b
+\fC
+clnptest <options>
+\fR
+.)b
+The \fI-r\fR option invokes \fIclnptest\fR as a reader, the
+\fI-w\fR option invokes it as a writer.
+Other options allow the user to indicate the destination, number of NSDUs,
+size of NSDUs,
+and NSDUs options.
+See \fIclnptest(8)\fR for more information.
+.pp
+\fIclnptest\fR is normally used in the following manner.
+On one machine, invoke \fIclnptest\fR as a reader:
+.(b
+\fC
+clnptest -r
+\fR
+.)b
+On a different machine, transmit an NSDU.
+For example, to test the source route function, one invokes:
+.(b
+\fC
+clnptest -w -h a -oR "b, c, d"
+\fR
+.)b
+This sends an NSDU to host 'a', source routing it via
+hosts 'b', 'c', and 'd'.
+.sh 3 "The Troll"
+In order to test CLNP reassembly certain errors must be generated.
+The mechanism used has two parts,
+the user program \fIclnptroll\fR, which enables and disables
+the generation of these errors, and the
+kernel resident error-creation routines.
+.pp
+Troll options allow one to duplicate an NSDU with a specified frequency.
+The kernel must be compiled with the \fIconfig\fR option \fITROLL\fR
+in order to include troll code.
+See \fIclnptroll(8)\fR for more information.
+.sh 3 "Debugging CLNP"
+.pp
+The following sections describe the \fIbark\fR options
+appropriate for testing parts of CLNP.
+Refer to \fIbark(8)\fR for more information about debugging
+using \fIbark\fR..
+.sh 4 "Sending NSDUs"
+.pp
+Turning on the \fIbark\fR
+option \fIC.output\fR causes information to be
+printed whenever an NSDU is transmitted.
+Translation of NSAP addresses to SNPA can be monitored by turning on
+the \fIC.un\fR, or \fIC.lan\fR options.
+Parts of outgoing NSDUs can be dumped when the \fIC.dumpout\fR
+option is on.
+Routing activity can be watched by turning on \fIC.route\fR and \fIC.iso\fR.
+Information about CLNP option processing is available with \fIC.options\fR.
+.sh 4 "Forwarding NSDUs"
+.pp
+The \fIforward\fR switch will cause debugging information to be displayed
+whenever NSDUs are forwarded.
+.sh 4 "Receiving NSDUs"
+.pp
+Information is displayed about incoming NSDUs when the \fIC.input\fR
+option is enabled.
+A portion of incoming NSDUs can be dumped by turning on the
+\fIC.dumpin\fR option.
+.sh 4 "Fragmentation and Reassembly"
+.pp
+The options \fIC.frag\fR and \fIC.reass\fR turn on debugging for the
+CLNP fragmentation and reassembly functions.
+.sh 2 "TP Testing"
+.pp
+Five services were used for most of the testing:
+the \fIdiscard\fR service,
+the \fIecho\fR service,
+the \fIremote login\fR service,
+the \fIremote shell\fR service,
+and
+the \fIsimple file transfer\fR service.\**
+.(f
+\** In fact, ancestors of these services were used for testing the
+ARGO transport implementation during development.
+These programs in their original forms were very cumbersome to use;
+consequently they evolved to become the services described here.
+.)f
+Each service consists of a daemon process or server that listens
+on a well-known transport selector (which is listed in the
+ARGO directory service), and an active process that contacts the
+server.
+Four of these services,
+discard, echo, remote login, and remote shell,
+are supported by the
+\fIisod\fR suite of daemons, which is a
+version of the \fIinetd\fR programs that uses
+the ISO protocol suite.
+.sh 3 "The Discard Service"
+The discard server listens on the transport selector
+registered in the ARGO directory service for the application
+"discard".
+The server accepts incoming connection requests,
+receives TSDUs, and throws away the TSDUs.
+It never initiates a disconnect, but expects its peer
+to disconnect the transport connection.
+.PP
+The program \fItpdiscard\fR connects to the
+discard server.
+The transport service and protocol options it uses are those
+indicated in the ARGO directory service.
+By changing the directory service entry for the
+discard service, each of the transport service options and
+protocol options can be demonstrated.
+See the manual pages
+\fItpdiscard(8)\fR,
+\fItp(4p)\fR,
+and
+\fIisodir(5)\fR
+for more information.
+.sh 3 "The Echo Service"
+The echo server listens on the transport selector
+registered in the ARGO directory service for the application
+"echo".
+The server accepts incoming connection requests,
+receives TSDUs, and returns the TSDUs to the sender.
+It never initiates a disconnect, but expects its peer
+to disconnect the transport connection.
+.pp
+The
+program \fItpping\fR connects to the
+echo server.
+The transport service and protocol options it uses are those
+indicated in the ARGO directory service.
+By changing the directory service entry for the
+echo service, each of the transport service options and
+protocol options can be demonstrated.
+See the manual pages
+\fItpping(8)\fR,
+\fItp(4p)\fR,
+and
+\fIisodir(5)\fR
+for more information.
+.sh 3 "The Remote Login Service"
+The remote login server listens on the transport selector
+registered in the ARGO directory service for the application
+"login".
+The server accepts incoming connection requests,
+implements the BSD remote login protocol, checks permissions using
+the \fC~/.rhosts\fR, and \fC/etc/passwd\fR files, and
+uses the ARGO directory service to discover name-to-NSAP-address
+mappings.
+If the remote user is authorized to log in to the end system on which
+the server runs, a login is started.
+.pp
+The program \fIrlogin.iso\fR connects to the remote login server.
+The transport service and protocol options it uses are those
+indicated in the ARGO directory service.
+By changing the directory service entry for the
+login service, each of the transport service options and
+protocol options can be demonstrated.
+See the manual pages
+\fIrlogin.iso(8)\fR,
+\fItp(4p)\fR,
+and
+\fIisodir(5)\fR
+for more information.
+.sh 3 "The Remote Shell Service"
+The remote shell server listens on the transport selector
+registered in the ARGO directory service for the application
+"shell".
+The server accepts incoming connection requests,
+implements the BSD remote command authorization protocol,
+checks permissions using
+the \fC~/.rhosts\fR, and \fC/etc/passwd\fR files, and
+uses the ARGO directory service to discover name-to-NSAP-address
+mappings.
+If the remote user is authorized to execute a shell on
+the end system on which
+the server runs, a shell is started.
+.pp
+The program \fIrcp.iso\fR connects to the remote shell server to
+effect a remote copy.
+The transport service and protocol options it uses are those
+indicated in the ARGO directory service.
+By changing the directory service entry for the
+shell service, each of the transport service options and
+protocol options can be demonstrated.
+See the manual pages
+\fIrcp.iso(8)\fR,
+\fItp(4p)\fR,
+and
+\fIisodir(5)\fR
+for more information.
+.sh 3 "The Simple File Transfer Service"
+.pp
+The last service consists of a pair of programs,
+\fItpfileget\fR and
+\fItpfileput\fR,
+which cooperate to transfer one file.
+The passive program, \fItpfileget\fR,
+listens on the transport selector registered in the ARGO directory service
+to support the application named "tptestsel".
+The sending program, \fItpfileput\fR,
+connects to the passive program, transfers in one TSDU
+the file named on the \fItpfileput\fR command line, and waits for the
+passive end to close the connection.
+\fITpfileget\fR
+opens a file of the name given on its command line,
+accepts one connection request, receives
+one TSDU, writes the contents of that TSDU to the opened file,
+and when it receives the end-of-TSDU indication,
+\fItpfileget\fR closes the transport connection.
+The transport service options and protocol options used by
+\fItpfileput\fR are determined by the ARGO directory service
+record that describes the applicaition "tptestsel".
+See the manual pages
+\fItpfileget(8)\fR,
+\fItp(4p)\fR,
+and
+\fIisodir(5)\fR
+for more information.
+.sh 3 "Internal TP Testing"
+.pp
+The methods used to test each of the various functions
+of TP are described in this section.
+One or more of the services described above were used, while
+the TP activity was observed with tracing or debugging or both.
+The statistics were cleared before each test and inspected
+after each test.
+Each test can be run with different protocol and service options,
+by changing the transport parameters in records
+in the ARGO directory service file.
+See the manual pages
+\fItpstat(8)\fR,
+\fItpmon(8)\fR,
+\fItppt(8)\fR,
+\fIbark(8)\fR,
+\fItp(4p)\fR,
+and
+\fIisodir(5)\fR
+for more information.
+.sh 4 "Normal and Expedited Data Transfer:"
+.pp
+TSDUs are
+distinguished by the presence or absence of the
+EOTSDU bit in the \fIflags\fR parameter of the
+\fIsendv()\fR system call.
+The data of a TSDU are copied into chains of \fImbufs\fR
+in the kernel so that the end of a TSDU lies in an mbuf
+with the \fIm_act\fR field non-zero.
+The end of a TSDU never lies in the middle of an
+mbuf.
+This is true on the receiving side as well.
+On output, the segmenting function,
+the function that copies user data into mbuf chains
+reorganizes mbuf chains into TPDUs,
+is observed using several debug options
+and trace options
+in the routines \fIsosend()\fR
+and \fItp_sbsend()\fR.
+On input, the reassembling mechanism
+is observed in the routine \fItp_stash()\fR.
+The debug options
+\fBT.ndata\fR,
+\fBT.sb\fR, and
+\fBT.xpd\fR
+print information
+pertinent to this function.
+.pp
+Expedited data complicates the matter of segmenting
+because markers must be kept in the chains of outgoing
+TPDUs to indicate the precedence of expedited data TPDUs
+over normal data TPDUs.
+The pertinent trace options are \fBT.sb\fR and \fBT.ndata\fR.
+With the trace and (or) debugging options on,
+and with \fItpdiscard\fR running, one can observe the segmentation
+and reassembly of TPDUs.
+.pp
+Using the file transfer programs to transfer a file,
+then transferring it back with \fIrcp\fR (the TCP version) if necessary, and
+using
+\fIdiff\fR, one can see that data are transferred correctly.
+The \fBT.input\fR trace option creates a readable hexadecimal dump of incoming TPDUs.
+The
+\fBT.emit\fR
+trace option creates the same sort of dump for outgoing
+TPDUs in \fItp_emit()\fR.
+Sequencing
+can be observed by using the
+\fBT.ndata\fR
+and
+\fBT.input\fR
+or
+\fBT.emit\fR
+trace options
+to see the sequence numbers assigned to TPDUs.
+.pp
+The
+\fBT.drop\fR
+debug option causes \fItp_input()\fR
+to throw away occasional TPDUs.
+(The formula for determining when to discard a TPDU
+is ad hoc and simplistic. It causes TPDUs to be
+discarded frequently but not so frequently that the
+receiving side has no chance to recover.)
+With tracing on and the file transfer programs running,
+resequencing can be observed
+and the correctness of the transferred data
+can be verified with \fIdiff(1)\fR.
+.pp
+The use of both normal and extended formats
+can be observed with the \fBT.input\fR and \fBT.emit\fR trace options.
+.pp
+The following statistics are of interest:
+.(b
+.nf
+\fIn\fR connections used extended format
+\fIn\fR connections allowed transport expedited data
+\fIn\fR connections turned off checksumming
+\fIn\fR connections dropped due to retrans limit
+\fIn\fR EOT bits on incoming TPDUs
+\fIn\fR EOT bits on outgoing TPDUs
+\fIn\fR XPD marks discarded
+\fIn\fR XPD stopped data flow \fIm\fR times
+\fIn\fR DTs out of order
+\fIn\fR DTs not in window
+\fIn\fR duplicate DTs
+\fIn\fR XPDs not in window
+\fIn\fR XPDs w/o credit to stash
+\fIn\fR DT (sent)
+\fIn\fR DT (received)
+\fIn\fR DT (retransmitted)
+\fIn\fR XPD (sent)
+\fIn\fR XPD (received)
+\fIn\fR random DTs dropped
+.fi
+.)b
+.sh 4 "Checksumming, use and non-use:"
+.pp
+The checksum generation and checking
+routines were first written and debugged as user-level
+routines before they were modified for kernel use.
+The kernel routines may be observed with the
+\fBT.chksum\fR
+debug option.
+Various sizes of mbufs can be created by creative use of the
+ARGO directory service, particularly by changing the value of the
+attribute \fItp.tpdusize\fR.
+There is no trace option for checksumming.
+Checksumming has been used with transfers to and from at least
+one other TP implementation.
+.pp
+The statistics that are pertinent to checksumming are:
+.(b
+.nf
+\fIn\fR connections turned off checksumming
+\fIn\fR invalid checksums
+.fi
+.)b
+.sh 4 "Acknowledgment:"
+.pp
+Acknowledgment can be observed by using the
+debug and trace options
+\fBT.aks\fR,
+\fBT.akr\fR,
+\fBT.input\fR,
+\fBT.emit\fR,
+and
+\fBT.driver\fR.
+The transport driver (finite state machine) and the routine
+\fItp_goodack()\fR dump information appropriate to acknowledgments.
+If the \fBT.ndata\fR, and \fBT.emit\fR or \fBT.input\fR trace options are used
+along with the \fBT.aks\fR and \fBT.akr\fR trace options,
+a complete picture of the data transfer and acknowledgment
+activity can be created.
+The acknowledgments for expedited data are traced with
+the
+\fBT.xpd\fR
+trace option.
+The routine \fItp_goodXack()\fR and the finite state
+machine dump information when the
+\fBT.xpd\fR
+debug and trace options are used.
+To cause expedited data to be generated,
+the -e or -E option on the discard programs or the file
+transfer programs are used.
+To observe the different acknowledgment strategies,
+the protocol options were changed in the ARGO directory service.
+.pp
+The pertinent statistics are:
+.(b
+.nf
+\fIn\fR AK (received)
+\fIn\fR AK (sent)
+ACK reasons:
+\fIn\fR not acked immediately
+\fIn\fR strategy==each
+\fIn\fR strategy==fullwindow
+\fIn\fR duplicate DT
+\fIn\fR EOTSDU
+\fIn\fR reordered
+\fIn\fR user rcvd
+\fIn\fR fcc reqd
+.fi
+.)b
+.pp
+The smoothed average round trip time is kept
+for outgoing TPDUs for each transport connection
+and for the entire TP entity.
+The time each TPDU is transmitted is recorded, and when an acknowledgment
+arrives, the round trip time is computed for the lowest
+sequence number that this AK TPDU acknowledges.
+The computation of round trip times can be observed
+in a trace with the
+\fBT.rtt\fR
+option.
+.pp
+In addition to average round trip times, the kernel
+maintains the standard deviation of the round trip times.
+This statistic is kept for each connection and for the entire
+TP entity.
+In fact, four such sets of statistics are kept for the TP entity:
+.np
+for traffic not on a public data network (PDN) and on the same network as this end system,
+.np
+for traffic not on a public data network (PDN) and on not the same network as this end system,
+.np
+for traffic on a public data network (PDN) and on the same network as this end system,
+and
+.np
+for traffic on a public data network (PDN) and not on the same network as this end system.
+The determination of whether traffic is on the same network as this end system
+is based on the "network portion" of the peer's NSAP-address.
+For more information about this, see the section of this document titled
+\fB"Network Layer Routing"\fR.
+.pp
+The smoothed average round trip time statistics for a given
+can be observed with the -t option to
+\fItpstat(8)\fR.
+The global round trip statistics can be observed with the -s option to
+\fItpmon(8)\fR.
+.sh 4 "Flow Control:"
+.pp
+Flow control activity is the transfer of credit information
+from end to end and the proper use of that information.
+To see that it works properly, one must observe three
+things:
+the receiving window must shut down and reopen,
+the sender must transmit enough TPDUs to fill the receiver's window
+but no more, and the receiver must renege previously advertised credit.
+These three behaviors have been observed as follows.
+.pp
+Tracing with the
+\fBT.ndata\fR,
+\fBT.aks, \fR
+\fBT.akr, \fR
+\fBT.emit\fR
+and
+\fBT.input\fR
+trace options
+are used.
+The program \fItpdiscard\fR or a simple file transfer
+is run with various window
+and maximum TPDU sizes, various acknoledgment strategies, and
+various retransmission strategies,
+and the activity is observed with the trace.
+The debug option
+\fBT.reneg\fR
+must be used to fake a reneging of credit, since
+the ARGO transport entity does not renege its advertised credit
+under normal operation.
+At the beginning of a connection a closed window may almost always
+be observed.
+The receiving user process may be stopped
+to force a window to shut down.
+The interesting statistics are
+.(b
+.nf
+\fIn\fR times local cdt reneged (receiving)
+\fIn\fR foreign window closed (sending)
+\fIn\fR faked reneging of cdt
+\fIn\fR DT TPDUs (sent)
+\fIn\fR DT TPDUs (received)
+\fIn\fR AK TPDUs (sent)
+\fIn\fR AK TPDUs (received)
+ACK reasons:
+\fIn\fR not acked immediately
+\fIn\fR strategy==each
+\fIn\fR strategy==fullwindow
+\fIn\fR duplicate DT
+\fIn\fR EOTSDU
+\fIn\fR reordered
+\fIn\fR user rcvd
+\fIn\fR fcc reqd
+.fi
+.)b
+.sh 4 "Retransmission and retention until acknowledgment:"
+.pp
+To observe that the sender retains TPDUs until they are
+acknowledged, one needs only to use the
+\fBT.drop\fR
+debug option to cause TPDUs to be dropped by the receiving side.
+They are then retransmitted by the sender
+and finally dropped when the acknowledgment arrives.
+That the buffers used to hold retained TPDUs are freed
+can be observed by
+running \fInetstat(8)\fR with the -m option
+on a quiescent system to observe the number of mbufs in use,
+then
+running a test with the
+\fBT.drop\fR debug option on to cause retransmission,
+and
+finally
+running netstat -m again after the test is over,
+to see that all the mbufs have been freed by TP.
+The actual retransmission activity can be observed in a trace
+with the
+\fBT.ndata, \fR
+\fBT.emit\fR and
+\fBT.input\fR trace options.
+The retransmission strategy to be used is controlled by the
+ARGO directory service.
+The statistics
+.(b
+.nf
+\fIn\fR DT (retransmissions)
+\fIn\fR XPD (retransmissions)
+\fIn\fR CR (retransmissions)
+\fIn\fR CC (retransmissions)
+\fIn\fR DR (retransmissions)
+.fi
+.)b
+.lp
+indicate the number of retransmissions that actually occurred.
+.sh 4 "Timers:"
+.pp
+The debug and trace option
+\fBT.timer\fR
+dumps information about the timers as they are set,
+as they are cancelled, and as they expire.
+The statistics
+.(b
+.nf
+\fIn\fR ticks
+\fIn\fR timers set
+\fIn\fR timers expired
+\fIn\fR timers cancelled
+\fIn\fR inactive timers cancelled
+\fIn\fR connections dropped due to retrans limit
+\fIn\fR CCs sent to zero dref
+.fi
+.)b
+.lp
+are printed for both the E-type timers and for the C-type timers.
+The effect of timers can be seen by observing retransmissions.
+Two simple ways to force retransmissions are:
+.np
+to use the \fBT.zdref\fR debug option,
+which causes CCs to contain a zero destination reference,
+so that connection establishment will time out, and
+.np
+to attempt to open a connection to a transport selector on which
+no process is listening.
+Either of these actions, along with the
+\fBT.connect\fR
+trace or debug option will permit
+observation of the timeout facilities.
+.sh 4 "TPDU creation and parsing:"
+.pp
+TPDUs are created for output in
+\fItp_emit()\fR.
+The
+\fBT.emit\fR
+trace
+option dumps TPDUs as they are transmitted.
+\fITp_input()\fR parses TPDUs on input.
+The
+\fBT.input\fR
+trace
+option dumps TPDUs as they are received.
+The debug options \fBT.emit\fR and \fBT.input\fR dump a lot of excess information
+to the console, and are used primarily for debugging
+extremely pathological behavior.
+.pp
+By tracing the execution of
+\fItpdiscard\fR or a simple file transfer,
+with a variety protocol and service options,
+using the
+\fBT.connect,\fR
+\fBT.emit,\fR
+\fBT.input,\fR
+\fBT.ndata,\fR
+\fBT.aks,\fR
+\fBT.akr,\fR
+and
+\fBT.xpd\fR
+options,
+one can verify the correct placement of TPDU options
+on all types of TPDUs.
+The interesting statistics are
+.(b
+.nf
+\fIn\fR variable parameters ignored
+\fIn\fR invalid parameter codes
+\fIn\fR invalid parameter values
+\fIn\fR invalid dutypes
+\fIn\fR invalid negotiation failures
+\fIn\fR invalid destinagion referencess
+\fIn\fR invalid suffix parameters
+\fIn\fR invalid checksums
+\fIn\fR connections used extended format
+\fIn\fR connections allowed transport XPD
+\fIn\fR connections turned off checksumming
+\fIn\fR TP 4 connections
+\fIn\fR TP 0 connections
+.fi
+.)b
+.sh 4 "Separation and concatenation:"
+.pp
+Concatenation is not supported by this implementation.
+Separation is supported, however, and to test separation,
+some sort of concatenated TPDUs had to be created.
+The code for this is no longer supported.
+After testing locally with some temporary code to create concatenated
+TPDUs,
+the ARGO transport implementation was tested with another transport
+implementation that does generate concatenated TPDUs.
+The interesting statistics are:
+.(b
+.nf
+\fIn\fR concatenated TPDUs
+\fIn\fR TPDUs input
+.fi
+.)b
+.sh 4 "Length limits for TPDUs:"
+.pp
+Some TPDUs may take user data:
+the CR, CC, DR, DT, and XPD.
+All of these but the DT TPDU have limits to the amount
+of data they may carry.
+The limits are enforced for CR, CC, and DR TPDUs by
+\fItp_ctloutput()\fR,
+the routine that accepts data from the user.
+The limit for XPD TPDUs is enforced by
+\fItp_usrreq()\fR, which accepts expedited
+data from the user.
+To test the effectiveness of the checks on output, one may attempt
+to send expedited data with amounts larger than the limit (16 bytes).
+.pp
+On input the limits are checked in
+\fItp_input()\fR.
+To test the effectiveness of the checks on input, it was necessary
+to create an illegally large TPDU.
+The
+\fBT.badsize\fR
+debug option
+does this - it will turn a legitimate
+XPD TPDU into a XPD TPDU with 18 bytes
+of expedited data.
+The interesting statistics are:
+.(b
+.nf
+\fIn\fR illegally large XPD TPDU
+\fIn\fR invalid length
+.fi
+.)b
+.sh 4 "Frozen References:"
+.pp
+The key issue here is to see that references are not reassigned
+until the reference timer expires.
+This can be observed by watching the timer activity as described
+above and by observing the reference numbers chosen for sockets
+with the \fBT.emit\fR
+or \fBT.input\fR trace options, which trace the TPDU headers.
+.sh 4 "Inactivity Control:"
+.pp
+Inactivity control can be observed by turning on the trace options
+\fBT.aks\fR, \fBT.akr\fR and \fBT.emit\fR
+during a simple file transfer.
+In the middle of the transfer, if the sender process
+is stopped, both TP entities continue
+to send acknowledgments.
+This can be observed in the trace.
+If the file tranfer is between machines, taking down one of the machines
+will cause the inactivity timer on the other machine to expire.
+The TP entity will respond to this by sending a DR TPDU
+to close the connection, which can be observed in the trace.
+The expiration of the timer can be observed in a trace if the
+\fBT.driver\fR option is used.
+This option traces all events and state changes in the
+TP
+finite state machine.
+.sh 4 "Connection establishment:"
+.pp
+The process of connection establishment can be observed with the
+\fBT.connect\fR
+trace and debug options, and
+the
+\fBT.driver \fR
+trace option.
+Various states of the connection establishment state machine
+may be observed with the
+the debug option
+\fBT.zdref\fR.
+This option
+causes \fItp_input()\fR
+to change the foreign reference on an incoming CC TPDU to zero,
+eventually causing the CC TPDU to be dropped,
+retransmissions of the CC to occur,
+and the connection to time out before being established.
+The statistics of interest are:
+.(b
+.nf
+\fIn\fR CCs sent to zero dref
+\fIn\fR invalid dest refs
+\fIn\fR CC (received)
+\fIn\fR CC (sent)
+\fIn\fR CC (retransmitted)
+\fIn\fR (connections) timed out on retrans
+.fi
+.)b
+.sh 4 "Disconnection:"
+.pp
+Various states of the connection breakdown part of the state machine
+may be observed with the
+the trace options
+\fBT.input\fR
+or
+\fBT.emit\fR,
+\fBT.driver\fR
+and
+running the
+discard or file transfer programs.
+.sh 4 "Association of TPDUs with Transport Connection:"
+.pp
+The problem of locating a transport connection
+given a TPDU is handled in
+\fItp_input()\fR in one of two ways.
+For an incoming CR TPDU, the transport suffix
+is used to locate a transport protocol
+control block (PCB), to which a transport
+connection is made by creating a new socket and PCB.
+For all other TPDU types, the destination reference is used
+to locate the appropriate transport connection.
+This is done by scanning the list of reference blocks.
+Debug and trace options
+were used to debug these sections of code but have since been
+removed due to their effect on the readability
+and maintainability of this code.
+The trace options
+\fBT.connect\fR
+and
+\fBT.newsock\fR
+creates trace records that contain the address of the
+socket as well as that of the PCB
+when a socket is opened.
+When a TPDU arrives for a given socket,
+the trace records created by the
+\fBT.input \fR
+option will also contain the address of the PCB that is found
+in
+\fItp_input()\fR.
+These two addresses can be compared in the trace output to observe
+that the proper association is made.
+.sh 4 "Protocol Errors and the ER TPDU:"
+.pp
+Certain types of errors are intended to evoke the response
+from the TP entity of sending an ER or a DR TPDU.
+The routine
+\fItp_error_emit()\fR
+creates ER and DR TPDUs for this purpose.
+The debug and trace option
+\fBT.erroremit\fR
+dumps information about the activity of this routine.
+Since ER TPDUs are not generated under normal circumstances,
+the parsing of ER TPDUs was tested in this
+implementation by code that generated (illegitimate) ER TPDUs,
+This code was removed because it significantly complicated code maintenance.
+.sh 4 "User Interface:"
+.pp
+The debug and trace options
+\fBT.request\fR,
+\fBT.params,\fR
+\fBT.indication\fR
+and
+\fBT.syscall\fR and
+dump information about the user interface.
+Most of the debugging code added to the socket-layer
+routines for debugging has since been removed so that
+the source (which is functionally unchanged from the 4.3 release)
+would not unnecessarily be changed.
+\fIRlogin.iso\fR, the TP version of remote login,
+exercises some of the original BSD data transfer system calls
+(\fIread()\fR and \fIwrite()\fR)
+rather than \fIsendv()\fR and \fIrecv()\fR.
+The interesting statistics are
+.(b
+.nf
+\fIn\fR EOT indications
+\fIn\fR user rcvd
+\fIn\fR connections used extended format
+\fIn\fR connections allowed transport XPD
+\fIn\fR connections turned off checksumming
+\fIn\fR TP 4 connections
+\fIn\fR TP 0 connections
+.fi
+.)b
diff --git a/share/doc/iso/wisc/def.nr b/share/doc/iso/wisc/def.nr
new file mode 100644
index 0000000..1e691bf
--- /dev/null
+++ b/share/doc/iso/wisc/def.nr
@@ -0,0 +1,144 @@
+.NC "Definitions"
+.sh 1 "General Terms"
+.ip "Kernel" 5
+The source code or binary module for the Acis Operating System
+(also know as AOS and IBM/4.3).
+.ip "User process" 5
+An instance of a program that is
+running in unprivileged mode, in the unprivileged address space
+commonly know as "user address space", in other words, not
+part of the kernel.
+.ip "IPC" 5
+Interprocess communication, the mechanism by which two different
+user processes send messages to each other.
+.ip "Unix, AOS" 5
+ACIS Operating System, the IBM ACIS port of Berkeley Unix 4.3BSD.
+.ip "PCB, pcb" 5
+Protocol control block. Each instance of a protocol machine
+keeps status information, addresses, and in some cases queues
+in a pcb for each connection or socket.
+.ip "Domain" 5
+In the Berkeley Unix environment, a domain is an abstract entity which
+comprises a network architecture, addressing scheme, address format,
+network protocols, and transport protocols.
+.sh 1 "Transport Layer Terms"
+.ip "ISO 8073"
+ISO Draft International Standard 8073, Transport Protocol Specification
+.ip "TP" 5
+The collection of transport
+classes that have been implemented in ARGO, classes 0 and 4.
+Also means the ARGO implementation of TP.
+.ip "TP 0" 5
+Transport class 0.
+.ip "TP 4" 5
+Transport class 4.
+.ip "Transport entity" 5
+Software or hardware that implements the elements of procedure
+described in ISO 8073.
+.ip "Transport user" 5
+User process that make use of the services
+provided by a transport entity.
+.ip "Transport service interface" 5
+The syntax and semantics of the set of procedures, functions, and system calls
+that are invoked by a transport user,
+through which the services of the transport entity are delivered.
+.ip "TPDU" 5
+Transport protocol data unit, a packet that is
+passed from one transport entity to another.
+.ip "TSDU" 5
+Transport service data unit, the logical unit of data that is
+passed from a transport entity to a transport user, or from
+a transport user to a transport entity.
+.ip "CR TPDU" 5
+Connection request TPDU.
+.ip "CC TPDU" 5
+Connection confirm TPDU.
+.ip "DR TPDU" 5
+Disconnect request TPDU.
+.ip "DC TPDU" 5
+Disconnect confirm TPDU.
+.ip "DT TPDU" 5
+Normal data TPDU.
+.ip "XPD TPDU" 5
+Expedited data TPDU.
+.ip "AK TPDU" 5
+Normal data acknowledgment TPDU.
+.ip "XAK TPDU" 5
+Expedited data acknowledgment TPDU.
+.ip "ER TPDU" 5
+Error TPDU.
+.sh 1 "Network Layer Terms"
+.ip "ISO 8473"
+ISO Draft International Standard 8473, connectionless network protocol.
+.ip "CONS"
+Connection Oriented Network Service.
+.ip "COSNS"
+Connection Oriented Sub-Network Service.
+.ip "CLNS"
+Connectionless Network Service.
+.ip "CLNP"
+Connectionless Network Protocol, or ISO 8473.
+.ip "Network Entity"
+Software or hardware that implements the elements of procedure described
+in ISO 8473.
+.ip "Network Service User"
+Software components that make use of the services provided by a network
+entity.
+.ip "Network Service Provider"
+Software components that provide the services of a network entity.
+.ip "NSAP"
+Network Service Access Point. The point at which the OSI network service
+is made available to the network service user by the network service
+provider.
+.ip "NSAP address"
+Information that the network service provider needs to identify an
+NSAP. The source and destination address fields of a CLNP packet
+are NSAP addresses.
+.ip "ES"
+End system. A system running the complete suite of OSI protocols which can
+act as an end point for communication.
+.ip "IS"
+Intermediate system. A system running the OSI layers 1, 2, and 3 which
+can act only a packet router.
+.ip "SNPA"
+The Subnetwork Point of Attachement is the point where a \fIreal\fR
+end or intermediate system is attached to a \fIreal\fR subnetwork.
+.ip "SNPA address"
+Information that a \fIreal\fR subnetwork need to identify a \fIreal\fR end
+or intermediate system. This is commonly referred to as the hardware address.
+.ip "NPDU"
+Network Protocol Data Unit. The unit of data which is exchanged between
+network entities.
+.ip "DT NPDU"
+Normal data NPDU.
+.ip "ER NPDU"
+Error report NPDU.
+.ip "Initial NPDU"
+A NPDU carrying the whole of the user data from an N-UNITDATA request.
+.ip "Derived NPDU"
+a NPDU whose field ar identical to those of an initial NPDU, except that it
+carries only a segment of the user data from an N-UNITDATA request.
+.ip "Segment"
+A distinct unit of data consisting of part or all of the user data provided
+in the N-UNITDATA request and delivered in the N-UNITDATA indication.
+.ip "Segmentation"
+The act of generation two or more derived NPDUs from an initial or derived
+NPDU.
+.ip "Fragment"
+A DoD Internet Protocol term with the same meaning as "segment". Used
+synonymously with "segment."
+.ip "Fragmentation"
+A DoD Internet Protocol term with the same meaning as "segmentation". Used
+synonymously with "segmentation."
+.ip "Reassembly"
+The act of regenerating an initial NPDU from two ore more derived NPDUs.
+.ip "MTU"
+Maximum transmission unit. The maximum size of a packet that can be
+transmitted on a medium or through a protocol.
+For example, the MTU of the TP protocol is 8192 bytes, the MTU
+of and Ethernet device is 1500 bytes, and the MTU of the OSI Network
+service is 512 bytes.
+.ip "Network interface"
+The device used to attach a computer to a network, for example,
+an Ethernet adapter, or a Token Ring adapter.
+This unfortunate terminology is inherited from BSD Unix.
diff --git a/share/doc/iso/wisc/dogrn b/share/doc/iso/wisc/dogrn
new file mode 100755
index 0000000..83964ae
--- /dev/null
+++ b/share/doc/iso/wisc/dogrn
@@ -0,0 +1,6 @@
+#! /bin/csh -f
+set dev=fa
+foreach m ($argv)
+ echo grn -P$dev $m.grn ">" $m.nr
+ grn -P$dev $m.grn > $m.nr
+end
diff --git a/share/doc/iso/wisc/eicon.nr b/share/doc/iso/wisc/eicon.nr
new file mode 100644
index 0000000..64db4aa
--- /dev/null
+++ b/share/doc/iso/wisc/eicon.nr
@@ -0,0 +1,729 @@
+.sh 2 "X.25 Public Data Network Support"
+.pp
+This ARGO release includes support for an X.25 Public Data Network (PDN)
+in the form of a device driver for the Eicon Technology
+Network Adapter \**.
+.(f
+This adapter, its software, and its documentation are
+available from Eicon Technology Corporation, 3452 Ashby Street, Montreal,
+Quebec, Canada H4R 2C1.
+.)f
+The adapter and its software, together with
+the ARGO \fIecn(4)\fR driver, implement
+the X.25 packet layer protocol as required to support the OSI connection
+oriented network service.
+The remainder of this section of this manual
+destribes the ARGO device driver (hereinafter called "the driver")
+for the Eicon Technology Network Adapter (hereinafter called "the adapter"),
+the interface between the driver
+and the CONS software described above, and the interface
+between the driver and the software on the adapter.
+.sh 3 "Software Modules"
+.lp
+The modules relevant to the design of the driver are listed below.
+.ip "\fICONS -\fR"
+The Connection Oriented Network Service (CONS) provides the upper ISO layers
+with an interface to the PDN.
+In this release,
+the PDN is 1980 X.25, although support for 1984 X.25 is included.
+CONS can receive requests
+from the CLNP entity and
+from the OSI transport entity.
+In addition, the CONS module
+supports \fIioctl()\fR commands used by
+\fIifconfig(8)\fR to configure
+the X.25 network address and to
+declare the adapter to be up or down.
+See \fIcons(4p)\fR.
+.ip "\fIDriver -\fR"
+The driver accepts commands from CONS, formats these commands
+for the adapter, and interprets error indications delivered by the adapter.
+This driver supports all the UNIX configuration device structures.
+See \fIecn(4)\fR.
+.ip "\fIEcnconf -\fR"
+\fIEcnconf\fR is a program that allows the privileged user to
+reconfigure
+the options offered by the software on the adapter.
+\fIEcnconf\fR can be run at any time.
+See \fIecnconf(8)\fR.
+.ip "\fIEcnload -\fR"
+\fIEcnload\fR is a program that downloads Eicon Technology software
+to the adapter and passes the configuration changes made
+with the \fIecnconf\fR program to the driver.
+\fIEcnload must be run only when the X.25 link is considered down\fR.
+See \fIecnload(8)\fR.
+.ip "\fIEcnstat -\fR"
+\fIEcnstat\fR is a program that
+prints the connection state information and counters kept by
+the adapter and by the driver.
+The statistics include the number of sends and receives,
+active connections, and errors.
+For more information, see \fI ecnstat(8)\fR.
+.ip "\fIAdapter -\fR"
+The adapter's interface to the driver is the Network Control
+Block and Request Vector that exist within the adapter's shared memory (often
+called the "Common Data Area").
+This is described in detail below.
+.sh 3 "Interactions Among the Modules"
+.lp
+The commands
+passed between CONS and the driver can be any one of the \fIECN\fR
+commands outlined in the
+sections "ECN Requests" and "ECN Replies", below.
+CONS uses the \fCecnrestart()\fR procedure for restart requests,
+\fCecnshutdown()\fR procedure for shutdown requests, and
+\fCecnoutput()\fR procedure for all
+normal data transfer requests.
+CONS uses the \fCecnioctl()\fR procedure call for
+servicing adapter status requests from the X.25 statistics program \fIecnstat\fR.
+.lp
+Commands passed between the driver and the adapter can
+be any one of the network control block (\fINCB\fR)
+commands described in the section "NCB Commands", below.
+All commands to and from the adapter are
+communicated in the Network Control Block and Request Vector within
+the adapter's Common Data Area.
+.lp
+\fIEcnload\fR starts the
+Eicon Technology Network Adapter software on the adapter
+and downloads the validated configuration
+to the driver.
+.sh 3 "ECN Requests"
+.lp
+The \fIECN\fR request types that CONS can pass to the driver are listed below.
+.ip "\fIECN_STOP - (by calling ecnshutdown())\fR"
+This request instructs the driver to restart the network but not to listen for
+any incoming calls.
+CONS issues this request in response to an \fIioctl()\fR command
+issued by the utility program \fIifconfig\fR, when
+\fIifconfig\fR is used to bring down the adapter.
+.ip "\fIECN_RESTART - (by calling ecnrestart())\fR"
+This request instructs the driver to restart the network \fIand\fR to listen
+and accept any incoming calls.
+CONS issues this request in response to an \fIioctl()\fR command
+issued by the utility program \fIifconfig\fR, when
+\fIifconfig\fR is used to bring the adapter up.
+.ip "\fIECN_CALL - 0x90\fR"
+This request instructs the driver to place
+a call request
+to the specified DTE.
+.ip "\fIECN_CLEAR - 0x92\fR"
+This request instructs the driver to clear a given virtual circuit.
+All outbound data are acknowledged by the remote DTE
+before the circuit is cleared.
+.ip "\fIECN_SEND - 0x94\fR"
+This request instructs the driver to transmit a data buffer across a given
+virtual circuit.
+.ip "\fIECN_RESET - 0x04\fR"
+This request instructs the driver to reset the given virtual circuit and
+clear out all outstanding requests
+associated with that virtual circuit.
+.ip "\fIECN_STATUS - 0xb4 (exclusively through ecnioctl())\fR"
+This requests instructs the driver to
+solicit the adapter's current connection state information and
+counters.
+.sh 3 "ECN Replies"
+.lp
+The \fIECN\fR responses
+the driver can give
+to CONS are listed below.
+.ip "\fIECN_CONNECT - 0x01\fR"
+This reply notifies CONS that the driver has established a virtual circuit
+connection initiated by the remote DTE.
+.ip "\fIECN_ACCEPT - 0x03\fR"
+This reply notifies CONS that an ECN_CALL request has succeeded. The
+reply contains a pointer to a protocol control block.
+.ip "\fIECN_REFUSE - 0x02\fR"
+This reply notifies CONS that a previous \fIECN_CALL\fR request has failed.
+The reply contains a pointer to a protocol control block.
+.ip "\fIECN_CLEAR - 0x92\fR"
+This reply notifies CONS that a given virtual circuit has been cleared
+either by the DCE or by the remote DTE.
+.ip "\fIECN_RECEIVE - 0x95\fR"
+This reply notifies CONS that the driver has received a data packet from
+the remote DTE.
+.ip "\fIECN_RESET - 0x04\fR"
+This reply notifies CONS that the virtual circuit has been reset either
+by the DCE or by the remote DTE.
+.ip "\fIECN_ACK - 0x05\fR"
+This reply tells CONS that the associated ECN_SEND request has been been
+completed by the adapter.
+.sh 3 "NCB Commands"
+.lp
+The driver hides from the CONS module
+many of the idiosyncrasies of the adapter's
+software interface
+by mapping many of the above \fIECN\fR requests into corresponding
+\fINCB\fR commands. Below is a list of requests that the driver can place to
+the adapter. For each request that the driver places to the adapter, the adapter
+returns with a command completion.
+.ip "\fINCB_CALL - 0x90\fR"
+This command creates a virtual circuit.
+.ip "\fINCB_LISTEN - 0x91\fR"
+This command tells the adapter that our host is
+willing to accept incoming calls.
+.ip "\fINCB_CLEAR (and NCB_ABORT) - 0x92\fR"
+This command clears a virtual circuit. An option exists to clear the circuit
+immediately, without waiting first for outstanding acknowledgments.
+.ip "\fINCB_SEND (and NCB_RESET) - 0x94\fR"
+This command sends data to the remote DTE. An option is
+available for resetting the
+virtual circuit. This command can return a status indicating that the
+circuit has been cleared by the DCE or the remote DTE.
+.ip "\fINCB_RECEIVE - 0x95\fR"
+This command tells the adapter that our host is
+willing to receive data on a given virtual circuit. This command can return
+received data, a reset circuit, M-, D-, and Q-bits, interrupt packets,
+or a cleared circuit.
+.ip "\fINCB_STATUS - 0xb4\fR"
+This command queries the adapter about
+the status of a virtual circuit.
+The driver uses this command to support the ECN_STATUS request.
+.ip "\fINCB_RESTART - 0xb2\fR"
+This command restarts the network. This command requires that a corresponding
+configuration file be passed down to the adapter.
+.bp
+.sh 3 "ECN Request and Reply Structure"
+.lp
+Below is
+the data structure used in CONS-driver
+communications.
+This data structure is a parameter to the
+\fIecnoutput()\fR procedure.
+\fC
+.nf
+/* Eicon Driver Request Structure -- used between CONS and the driver */
+
+struct eicon_request {
+ struct ecn_ncb eicon_req_ncb; /* the network control block */
+ caddr_t eicon_req_pcb; /* CONS pcb used on CALL requests */
+ int eicon_req_state; /* used internally by the driver */
+ int eicon_retry_cnt; /* used internally by the driver */
+ int eicon_more; /* used internally by the driver */
+ u_char eicon_reason; /* source of CLEAR requests */
+};
+\fR
+.lp
+The \fCeicon_req_ncb\fR field in the eicon request structure is of
+type \fCecn_ncb\fR, defined in the following section. This structure stores
+the command block
+that the driver uses in communicating with the adapter.
+The command block contains a \fIlogical session number\fR (LSN),
+which identifies a virtual circuit.
+Requests such as ECN_CALL are made without an LSN to identify
+a circuit.
+When an LSN is not available, the request is identified by
+the field
+\fCeicon_req_pcb\fR, which is a pointer to a CONS protocol control block.
+The \fCeicon_req_state\fR field is used by the driver to keep track
+of the status of the given request.
+The following list defines the various values for this field:
+.ip "\fIREQ_NEW\fR"
+The driver recognizes a new request, has placed the request into the driver's
+own request queue, but has yet to interrupt the
+adapter. (The driver maintains a pointer \fCecn_pending_req\fR that indicates
+whether an interrupt to the adapter is outstanding. If one is outstanding, the
+driver places any new requests in this \fIREQ_NEW\fR state. If an interrupt
+is not
+outstanding, the driver places the request immediately in the
+\fIREQ_INTERRUPT\fR state defined below.)
+.ip "\fIREQ_INTERRUPT\fR"
+The driver has dequeued the CONS request, assigned \fCecn_pending_req\fR to
+point to the request, and
+interrupted the adapter for a chance to post this request.
+.ip "\fIREQ_POSTED\fR"
+The driver has sent the request to the adapter.
+.ip "\fIREQ_COMPLETE\fR"
+The driver has just completed the request, and if necessary, is now posting
+it to CONS.
+.lp
+The \fCeicon_retry_cnt\fR field in the eicon request structure keeps track
+of how many times the driver has tried posting this command to the adapter.
+After the second retry, the driver gives up and performs the appropriate
+error routine.
+The \fCeicon_more\fR field defines a \fIRECEIVE\fR request that
+has been re-posted to the adapter to take care of m-bit transfers.
+The \fCeicon_reason\fR field quantifies the reason for a connection being
+cleared. These reasons are defined in the include file \fCiso_errno.h\fR.
+.lp
+Any data associated with the request are linked to the request through the
+request mbuf's \fCm_next\fR field.
+This is done so that when
+the driver calls the \fIMFREE_M\fR deallocation routine, both the request
+and the data are freed together.
+.lp
+The following chart defines those fields within the eicon request structure
+that are relevant in any CONS request
+to the driver via the \fIecnoutput()\fR call.
+.sp
+.sz 8
+.TS
+center,box,tab(:);
+c s s s s
+c||c s s s
+c||c|c|c|c
+l||l|l|l|l.
+\fBField Definitions for CONS \(-> Driver Requests\fR
+_
+\fI:Request Types (CONS \(-> Driver)\fR
+\fIField:ECN_CALL:ECN_CLEAR:ECN_SEND:ECN_RESET\fR
+=
+\fIncb\(->command\fR:0x90:0x92:0x94:0x04
+_
+\fIncb\(->loc_ses_num\fR:T{
+.na
+leave as zero
+T}:VC #:VC #:VC #
+_
+\fIncb\(->info\fR:0x0:0x0:0x0:0x2
+_
+\fIeicon_req_pcb\fR:T{
+.na
+address of CONS' protocol control block
+T}:NULL:NULL:NULL
+_
+\fIeicon_req_data\fR:T{
+.na
+address of mbuf containing contents of Call Request packet (including DTE address, facilities, and call user data)
+T}:T{
+.na
+NULL or address of mbuf containing contents of Clear Request packet
+T}:T{
+.na
+address of mbuf containing contents of user data
+T}:T{
+.na
+NULL or the address of mbuf containing a one byte Reset Diagnostic code
+T}
+.TE
+.sz 10
+.sh 3 "Structure of the Network Control Block (NCB)"
+.lp
+The \fCecn_ncb\fR structure is used by the driver to
+make requests of the adapter.
+\fC
+.nf
+/* Network Control Block -- used between the driver and the Eicon adapter */
+
+struct ecn_ncb {
+ u_char command; /* command field */
+ u_char retcode; /* return code field */
+ u_char lsn; /* local session number */
+ u_char info; /* additional information */
+ caddr_t buffer; /* pointer to data buffer's mbuf */
+ u_short length; /* buffer length */
+ u_char callname[16]; /* module name on NA "X25" */
+ u_char appl_name[16]; /* application name */
+ u_char rxto; /* receive timeout in secs */
+ u_char txto; /* send(tx) timeout in secs */
+ caddr_t post; /* NULL */
+ u_char lana_num; /* specifies Eicon Tech NA */
+ u_char cmd_cplt; /* command status */
+ u_char reserve[14]; /* reserved area */
+};
+\fR
+.sp
+.lp
+The chart below defines those fields that are relevant in any
+reply passed by the driver back up to CONS.
+.sp
+.sz 7
+.TS
+center,box,tab(:);
+c s s s s s s
+c||c s s s s s
+c||c|c|c|c|c|c
+l||l|l|l|l|l|l.
+\fBField Definitions for Driver \(-> CONS Replies\fR
+_
+\fI:Reply Types (Driver \(-> CONS)\fR
+\fIField:ECN_CONNECT:ECN_ACCEPT:ECN_REFUSE:ECN_CLEAR:ECN_RECEIVE:ECN_RESET\fR
+=
+\fIncb\(->command\fR:0x01:0x03:0x02:0x92:0x95:0x04
+_
+\fIncb\(->loc_ses_num\fR:VC #:VC #:ignore:VC #:VC #:VC #
+_
+\fIncb\(->info\fR:ignore:ignore:ignore:ignore:T{
+.na
+Interrupt received (bit 0), D-bit set (bit 6), and/or Q-bit set (bit 7). Zero
+info field implies a normal receive.
+T}:ignore
+_
+\fIeicon_req_pcb\fR:NULL:T{
+.na
+address of CONS's protocol control block
+T}:T{
+.na
+address of CONS's protocol control block
+T}:ignore:ignore:ignore
+_
+\fIeicon_req_data\fR:T{
+.na
+NULL or address of mbuf containing contents of Call Indication packet
+T}:T{
+.na
+NULL or address of mbuf containing contents of Call Connected data
+T}:T{
+.na
+NULL or address of mbuf containing contents of Call Cleared data
+T}:T{
+.na
+NULL or address of mbuf containing contents of Call Cleared data
+T}:T{
+.na
+address of mbuf containing contents of user data
+T}:T{
+.na
+NULL or address of mbuf containing one byte Reset Diagnostic code
+T}
+_
+\fIeicon_reason\fR:ignore:ignore:T{
+.na
+reason for refusal
+T}:T{
+.na
+reason for clear
+.T}:ignore:T{
+.na
+reason for reset
+T}
+.TE
+.sz 10
+.bp
+.sh 3 "Internal Driver Data Sructures"
+.lp
+The main driver data structure
+is the \fIecn_softc\fR structure.
+This structure keeps track of the interface request queue
+(\fCecn_if\fR and \fCecn_pending_req\fR),
+magic addresses on the adapter (\fCecn_iom_base, ecn_mem_base,\fR and
+\fCecn_data_base\fR),
+error statistics (\fCecn_errors\fR), the state
+of each virtual circuit (\fCecn_vc_state\fR), the state of the \fILISTEN\fR
+request (\fCecn_listen_pending\fR), and the current caller (\fCecn_cause\fR).
+\fC
+.nf
+struct ecn_softc {
+ int ecn_errors[NCB_MAX][ST_MAX];
+ int ecn_cause[CAUSE_MAX]; /* ecn_work() causes */
+ struct mbuf *ecn_pending_req; /* waiting for command req */
+ char ecn_listen_pending; /* boolean = listen req pending? */
+ char ecn_vc_state[LSN_MAX]; /* the current state of each vc */
+ struct ecn_device
+ *ecn_iom_base; /* base address of io map */
+ struct ecn_request_vector
+ *ecn_mem_base; /* base address of memory map */
+ caddr_t ecn_data_base; /* base address for data area */
+ struct ifnet ecn_if; /* queue of new requests */
+}
+\fR
+.so figs/ecn_queue.nr
+.sh 2 "Queueing in the Driver"
+.lp
+.CF
+illustrates the queueing mechanism used by the driver.
+.lp
+CONS queues its data transfer requests at the end of the queue managed by
+\fCecn_if\fR field in the \fCecn_softc\fR structure.
+At this point, each request has the state value of
+\fIREQ_NEW\fR.
+Once the driver notifies the adapter that it has a command to post,
+the driver dequeues the first request from the \fCecn_if\fR queue
+and sets the pointer
+\fCecn_pending_req\fR to point to the request.
+At this point, the request is in the \fIREQ_INTERRUPT\fR state.
+.lp
+Once the driver posts the request to the adapter, it
+dequeues the next request in the \fCecn_if\fR queue, reassigns the
+\fCecn_pending_req\fR pointer, and then indicates to the adapter
+that it is ready to post another request.
+The driver no longer has to keep track of the previous request,
+because for every reply, the adapter includes the associated
+mbuf pointer.
+While the request is outstanding, the request is in the \fIREQ_POSTED\fR state.
+.so figs/ecn_vc.nr
+.lp
+After the adapter completes the command, the driver may want to reply to CONS.
+It does this by placing its reply in CONS's \fCconsintrq\fR queue, defined as
+an external \fCifqueue\fR in the driver code.
+.sh 2 "Virtual Circuit States"
+.lp
+The \fCecn_vc_state\fR array in the \fCecn_softc\fR structure above keeps track
+of the state of each virtual circuit (VC).
+This is necessary to avoid handing
+the adapter any commands that may not apply during a given state.
+This mechanism
+is especially useful in dealing with unexpected aborts or clears where there
+is the potential for all outstanding commands to complete with errors.
+By changing
+states, the driver can prevent redundant commands (like clears and aborts)
+from being passed either to the adapter or to CONS.
+.lp
+The driver only keeps track of four different states, as illustrated in
+.CF
+.
+These states are:
+.ip "\fIVC_NO_CONNECTION\fR"
+When a virtual circuit is in this state, the virtual circuit does not exits.
+Only \fICALL\fR and \fILISTEN\fR commands are valid.
+.ip "\fIVC_DATA_XFER\fR"
+All commands, except \fICALL\fR and \fILISTEN\fR commands are valid once the
+connection exists.
+.ip "\fIVC_RESET_IN_PROGRESS\fR"
+In this state, either the driver has issued an \fINCB_RESET\fR or it has
+received a reset error code on the completion of a command.
+Only reissued \fIRESET\fR commands and \fIRECEIVE\fRs are
+valid.
+\fIRECEIVE\fR is valid in this state because the adapter uses the
+completion of this command to hand back the cause of the reset (the RESET
+INDICATION packet).
+.ip "\fIVC_CLEAR_IN_PROGRESS\fR"
+The driver has either issued an \fINCB_CLEAR\fR command or has just
+received a clear error code on the completion of a command.
+Within this state, only reissued
+\fICLEAR\fR and \fIABORT\fR commands are valid.
+.sh 2 "Error Statistics"
+.lp
+With the \fCecn_errors\fR field in the \fCecn_softc\fR structure,
+the driver maintains a two dimensional array of counters
+if the frequencies of errors.
+In order to inspect this array easily with
+the kernel debugger, the first index to every command ( <command, 0> ) is
+reserved for a four character ASCII command identifier.
+.bp
+.sh 3 "The Driver State Machine"
+.sh 2 "Handling of Normal Command Completions"
+.lp
+The chart below lists
+all the available adapter request types, at what level each of
+these requests can be used, options, and the driver's action after a normal
+completion of the command.
+.sp
+.sz 7
+.TS
+center,box,tab(:);
+c s s s
+c|c s|c
+c|c|c|c
+l|l|l|l.
+\fBNormal Completion Handling\fR
+_
+\fINCB:Options:Action Based on Normal Competion of\fR
+\fICommand:To Adapter:From Adapter:Driver\(->Adapter Command\fR
+=
+\fINCB_RESTART\fR:none:none:T{
+.na
+dequeue the request, and issue an NCB_LISTEN request to the adapter.
+T}
+_
+\fINCB_CALL\fR:none:connected:T{
+.na
+dequeue the request, pass an ECN_ACCEPT reply to CONS, and issue a RECEIVE to
+the adapter.
+T}
+_
+\fINCB_LISTEN\fR:T{
+.na
+use zero-length Call User Data and a zero-length Calling DTE address
+T}:none:T{
+.na
+dequeue the request, pass an ECN_CONNECT to CONS, and issue a RECEIVE to the
+adapter. Re-issue another NCB_LISTEN
+for another possible virtual circuit connection.
+T}
+_
+\fINCB_CLEAR\fR:T{
+.na
+normal clearing with all outstanding ACKs returned
+T}:none:T{
+.na
+dequeue the request.
+T}
+:_:_:_
+:T{
+.na
+immediate clearing
+T}:none:T{
+.na
+dequeue the request.
+T}
+_
+\fINCB_SEND\fR:T{
+.na
+normal send
+T}:none:T{
+.na
+dequeue the request and reply to CONS with an ECN_ACK.
+T}
+:_:_:_
+:T{
+.na
+reset the virtual circuit
+T}:none:T{
+dequeue the request.
+T}
+_
+\fINCB_RECEIVE\fR:none:T{
+.na
+normal, uncomplicated receive
+T}:T{
+.na
+dequeue the request and bcopy the data into the request's associated mbuf. Ship to CONS. Re-issue another NCB_RECEIVE.
+T}
+:_:_:_
+:none:T{
+.na
+m-bit set
+T}:T{
+.na
+same as above (adapter does the resegmentation automatically).
+T}
+:_:_:_
+:none:T{
+.na
+d-bit set
+T}:T{
+.na
+same as above.
+T}
+:_:_:_
+:none:T{
+.na
+q-bit set
+T}:T{
+.na
+same as above.
+T}
+:_:_:_
+:none:T{
+.na
+interrupt received
+T}:T{
+.na
+same as above.
+T}
+:_:_:_
+:none:T{
+.na
+reset received
+T}:T{
+dequeue the request, send an ECN_RESET back up to CONS, and issue another
+receive.
+T}
+.TE
+.sz 10
+.sp
+.uh "CONS \(-> Driver"
+.lp
+All entries in this column indicate that the CONS module can send this request
+down to the driver. Command names in parenthesis define the mapping between
+the \fIECN\fR and \fINCB\fR commands.
+.uh "Driver \(-> Adapter"
+.lp
+All checks in this column indicate that the driver can send this request
+to the adapter. The last column in the above table defines what the driver must
+do upon normal completion of the command from the adapter.
+Note that not all driver-to-adapter
+commands have a CONS-to-driver equivalent.
+This shows that this
+command request is generated within the driver, rather than originating from
+the CONS driver.
+.uh "Driver \(-> CONS"
+.lp
+All entries in this column indicate that the driver can send this reply
+back to CONS. Command names in parenthesis define the mapping between
+the \fIECN\fR and \fINCB\fR commands.
+.bp
+.sh 3 "Handling of Errors upon Command Completion"
+.lp
+Below is listed all the driver request and pseudo request types, along with the
+actions the driver must perform given a command completion error delivered by
+the Eicon Network Adapter.
+.sp
+.sz 7
+.TS
+center,box,tab(:);
+c s s s s s s s
+c||c s s s s s s
+c||c|c|c|c|c|c|c
+c||c|c|c|c|c|c|c
+l||l|l|l|l|l|l|l.
+\fBError Completion Handling\fR
+_
+:\fIAction Based on Error Completion of Driver \(-> Adapter Command\fR
+\fIError Returned\fR:_:_:_:_:_:_:_
+\fI:NCB_CALL:NCB_LISTEN:NCB_CLEAR:NCB_ABORT:NCB_RESET:NCB_SEND:NCB_RECEIVE\fR
+=
+\fIST_BAD_LEN\fR:<soft-error>:<soft-error>:<soft-error>:<soft-error>:<soft-error>:<soft-error>:<soft-error>
+_
+\fIST_INVALID\fR:<soft-error>:<soft-error>:<dequeue>:<dequeue>:<dequeue>:<dequeue>:<dequeue>
+_
+\fIST_COMMAND_TO\fR:<retry>:<retry>:<retry>:<retry>:<abort>:<abort>:<retry>
+_
+\fIST_ISSUE_ANOTHER_RCV\fR:<refuse>:<retry>:<retry>:<retry>:<abort>:<abort>:T{
+.na
+requeue request and increment "more" count
+T}
+_
+\fIST_BAD_LSN\fR:<soft-error>:<soft-error>:<dequeue>:<dequeue>:<dequeue>:<dequeue>:<dequeue>
+_
+\fIST_NO_RESOURCES\fR:<retry>:<retry>:<retry>:<retry>:<abort>:<abort>:<retry>
+_
+\fIST_CALL_CLEARED\fR:<refuse>:<retry>:<retry>:<retry>:<clear>:<clear>:<clear>
+_
+\fIST_COMMAND_CANCELLED\fR:<refuse>:<retry>:<retry>:<retry>:<abort>:<abort>:<abort>:
+_
+\fIST_NO_CIRCUITS\fR:<refuse>:<retry>:<retry>:<retry>:<abort>:<abort>:<abort>
+_
+\fIST_CALL_UNSUCCESSFUL\fR:<refuse>:<retry>:<retry>:<retry>:<abort>:<abort>:<abort>
+_
+\fIST_INCORRECT_CALLNAME\fR:<soft-error>:<soft-error>:<soft-error>:<soft-error>:<soft-error>:<soft-error>:<soft-error>
+_
+\fIST_X25_RESET\fR:<refuse>:<retry>:<retry>:<retry>:<dequeue>:<dequeue>:<retry>
+_
+\fIST_TOO_MANY_COMMANDS\fR:<retry>:<retry>:<retry>:<retry>:<abort>:<abort>:<retry>
+_
+\fIST_L1_NO_DATA_SET_READY\fR:<refuse>:<retry>:<retry>:<retry>:<abort>:<abort>:<abort>
+_
+\fIST_L1_NO_CLEAR_TO_SEND\fR:<refuse>:<retry>:<retry>:<retry>:<abort>:<abort>:<abort>
+_
+\fIST_L1_NO_CLOCK\fR:<refuse>:<retry>:<retry>:<retry>:<abort>:<abort>:<abort>
+.TE
+.sz 10
+.sp
+.lp
+Each of the actions from the above chart are defined as follows.
+.ip "\fI<abort>\fR -"
+The driver should clear the connection by issuing an \fINCB_ABORT\fR
+to the adapter and sending an \fIECN_CLEAR\fR to CONS.
+.ip "\fI<refuse>\fR -"
+The driver should send an \fIECN_REFUSE\fR back to CONS.
+.ip "\fI<dequeue>\fR -"
+The driver should simply dequeue the request. Usually these errors occur when a
+reset or clear occurs on the adapter while the driver is in the midst of
+issuing the command which subsequently completes with an error status.
+.ip "\fI<clear>\fR -"
+The driver should send an \fIECN_CLEAR\fR back up to CONS.
+.ip "\fI<retry>\fR -"
+The driver should requeue the request if and only if the
+\fCecn_retry_cnt\fR field in the request structure does not exceed the
+retry maximum.
+.ip "\fI<soft-error>\fR -"
+This action only takes place when a software error has occurred. The driver
+should
+print the error to the console in big bold letters and then panic.
+.bp
+.sh 3 "The IFP Flags"
+.lp
+The IFP flags in the standard \fCifnet\fR structure
+should be used in the following way.
+.ip "\fIIFF_UP on -\fR"
+This flag is set by the driver only after the procedure \fIecnrestart()\fR
+successfully completes.
+.ip "\fIIFF_UP off -\fR"
+This flag is set immediately upon entry into the procedure \fIecnshutdown()\fR.
+.ip "\fIIFF_RUNNING on -\fR"
+This flag is set on whenever the \fIecnwork()\fR procedure is active, eg. the
+driver is actually doing something.
+.ip "\fIIFF_RUNNING off -\fR"
+This flag is turned off upon exit from the \fIecnwork()\fR procedure.
diff --git a/share/doc/iso/wisc/eicon.table5.1.orig.nr b/share/doc/iso/wisc/eicon.table5.1.orig.nr
new file mode 100644
index 0000000..78bda62
--- /dev/null
+++ b/share/doc/iso/wisc/eicon.table5.1.orig.nr
@@ -0,0 +1,126 @@
+.TS
+center,box,tab(:);
+c s s s s s s
+c|c s s|c s|c
+c|c|c|c|c|c|c
+l|c|c|c|l|l|l.
+\fBNormal Completion Handling\fR
+_
+\fINCB:Usage:Options:Action Based on Normal Competion of\fR
+\fICommand:CONS\(->Driver:Driver\(->Board:Driver\(->CONS:To Board:From Board:Driver\(->Board Command\fR
+=
+\fINCB_RESTART\fR:T{
+.na
+(ECN_RESTART)
+T}:\(sr::none:none:T{
+.na
+dequeue the request, and issue an NCB_LISTEN request to the board.
+T}
+_
+\fINCB_CALL\fR:(ECN_CALL):\(sr:T{
+.na
+(ECN_ACCEPT)
+T}:none:connected:T{
+.na
+dequeue the request, pass an ECN_ACCEPT reply to CONS, and issue a RECEIVE to
+the board.
+T}
+_
+\fINCB_LISTEN\fR::\(sr:T{
+.na
+(ECN_CONNECT)
+T}:T{
+.na
+use zero-length Call User Data and a zero-length Calling DTE address
+T}:none:T{
+.na
+dequeue the request, pass an ECN_CONNECT to CONS, and issue a RECEIVE to the
+board. Re-issue another NCB_LISTEN
+for another possible virtual circuit connection.
+T}
+_
+\fINCB_CLEAR\fR:(ECN_CLEAR):\(sr:(ECN_CLEAR):T{
+.na
+normal clearing with all outstanding ACKs returned
+T}:none:T{
+.na
+dequeue the request.
+T}
+:_:_:_:_:_:_
+::\(sr::T{
+.na
+immediate clearing
+T}:none:T{
+.na
+dequeue the request.
+T}
+_
+\fINCB_SEND\fR:(ECN_SEND):\(sr::T{
+.na
+normal send
+T}:none:T{
+.na
+dequeue the request and reply to CONS with an ECN_ACK.
+T}
+:_:_:_:_:_:_
+:T{
+.na
+(ECN_RESET)
+T}:\(sr::T{
+.na
+reset the virtual circuit
+T}:none:T{
+dequeue the request.
+T}
+_
+\fINCB_RECEIVE\fR::\(sr:(ECN_RECEIVE):none:T{
+.na
+normal, uncomplicated receive
+T}:T{
+.na
+dequeue the request and bcopy the data into the request's associated mbuf. Ship to CONS. Re-issue another NCB_RECEIVE.
+T}
+:_:_:_:_:_:_
+:::(ECN_RECEIVE):none:T{
+.na
+m-bit set
+T}:T{
+.na
+same as above (board does the resegmentation automatically).
+T}
+:_:_:_:_:_:_
+:::(ECN_RECEIVE):none:T{
+.na
+d-bit set
+T}:T{
+.na
+same as above.
+T}
+:_:_:_:_:_:_
+:::(ECN_RECEIVE):none:T{
+.na
+q-bit set
+T}:T{
+.na
+same as above.
+T}
+:_:_:_:_:_:_
+:::(ECN_RECEIVE):none:T{
+.na
+interrupt received
+T}:T{
+.na
+same as above.
+T}
+:_:_:_:_:_:_
+:::T{
+.na
+(ECN_RESET)
+T}:none:T{
+.na
+reset received
+T}:T{
+dequeue the request, send an ECN_RESET back up to CONS, and issue another
+receive.
+T}
+.TE
diff --git a/share/doc/iso/wisc/errors.nr b/share/doc/iso/wisc/errors.nr
new file mode 100644
index 0000000..51b5eb5
--- /dev/null
+++ b/share/doc/iso/wisc/errors.nr
@@ -0,0 +1,363 @@
+.\"$Header: errors.nr,v 1.2 88/12/06 16:06:07 nhall Exp $
+.\"$Source: /usr/argo/doc/kernel/RCS/errors.nr,v $
+.NC "Error Handling"
+This section describes the various ways that the ARGO kernel
+handles errors.
+For the purpose of this description,
+errors are divided into
+three classes : user errors, remote-end errors, and internal errors.
+These three classes of errors and the way
+the ARGO kernel handles them are described below.
+.sh 1 "Network Layer Errors"
+.pp
+The following section describes how errors are handled by CLNP.
+.sh 2 "User Errors"
+.pp
+User errors occur when attempting to send a CLNP packet. These errors
+are reflected back to the caller of \fIclnp_output()\fR as the return value
+of the function. The following table indicates the types of errors possible
+and their associated return codes:
+.(b L
+.TS
+tab(+), expand box;
+l l.
+Problem+Return Code
+=
+Unsupported option selected+EINVAL
+Incorrect address+ENAMETOOLONG
+Insufficient \fImbufs\fR+ENOBUFS
+Can't route packet+ENETUNREACH,EHOSTUNREACH
+Insufficient \fImbufs\fR+ENOBUFS
+.TE
+.)b
+.sh 2 "Remote-end Errors"
+.pp
+An error that occurs as the result of incoming NPDU
+is a remote-end error.
+.pp
+In the case of CONS,
+the majority of these are addressing problems,
+PDN-generated errors (network or gateway congestion, number busy),
+or higher layer negotiation problems.
+All ISO 8208 diagnostic codes that may appear in a call clearing packet
+are passed up to the higher layer.
+Some of the higher layer protocols pass this error indication to the
+user level program as well.
+The CONS statistics that are maintained by the "glue" module
+include counters for each of the possible
+ISO 8208 diagnostic codes seen on incoming packets.
+In addition to these error codes, there are some codes that may appear
+due to device driver problems when an NPDU arrives, for example,
+the driver may run out of buffers.
+All possible errors that may occur in the CONS module are listed
+in the file
+\fC<netargo/iso_errno.h>\fR,
+and the values listed in this file are passed to the user level
+program in the global integer variable \fIerrno\fR.
+The ARGO library
+\fClibisodir.a\fR
+includes an expanded version of
+\fIperror()\fR that interprets these extra values.
+.pp
+In the case of CLNP,
+the most remote-end errors are parsing errors.
+When a remote-end error is discovered, processing of the NPDU stops. The
+NPDU is discarded, and if error reporting is not disabled, and ER NPDU
+is sent back to the source of the offending packet. The following
+tables show the errors that may occur, and the error reason
+that will specified when the ER NPDU is returned.
+.pp
+The following general errors may occur while parsing an NPDU:
+.(b L
+.TS
+tab(+), box, expand;
+l l.
+Problem+Error Reason
+=
+NPDU arrives before interface is configured+ADDR_DESTUNREACH
+Packet too short or too big+GEN_INCOMPLETE
+Protocol identification wrong+GEN_HDRSYNTAX
+Version wrong+DISC_UNSUPPVERS
+Lifetime expired+TTL_EXPTRANSIT
+Incorrect checksum+GEN_BADCSUM
+Address section too short+GEN_INCOMPLETE
+Segment section too short+GEN_INCOMPLETE
+Options section too short+GEN_INCOMPLETE
+Unknown packet type+GEN_HDRSYNTAX
+Can't route packet (forwarding)+ADDR_DESTUNREACH
+.TE
+.)b
+The following errors are related to options processing:
+.(b L
+.TS
+tab(+), box, expand;
+l l.
+Problem+Error Reason
+=
+Duplicate option+GEN_DUPOPT
+Unknown option+DISC_UNSUPPTOPT
+Security format bad+GEN_HDRSYNTAX
+Security option present+DISC_UNSUPPSECURE
+Source route format bad+SRCRT_SYNTAX
+Record route too short+GEN_INCOMPLETE
+Record route format bad+GEN_HDRSYNTAX
+QOS format bad+GEN_HDRSYNTAX
+Priority format bad+GEN_HDRSYNTAX
+Error reason format bad+GEN_HDRSYNTAX
+Error reason on non-ER NPDU+DISC_UNSUPPOPT
+Error reason absent from ER NPDU+GEN_HDRSYNTAX
+.TE
+.)b
+.sh 2 "Internal Errors"
+.pp
+Internal errors occur as a result of a programmer error. These errors
+will result in a kernel \fIpanic()\fR. The following panics have been
+coded into CLNP:
+.(b L
+.TS
+tab(+), box, expand;
+l l.
+\fIPanic()\fR message+Reason
+=
+clnp_init: no raw clnp+The raw clnp protocol is not
++configured into the kernel.
+_
+clnp_srcaddr: ifp does not match interface+The ifp
++passed to \fIclnp_srcaddr()\fR is invalid.
+.TE
+.)b
+.sh 1 "Transport Layer Errors"
+.pp
+.sh 2 "User Errors"
+.pp
+TP handles these errors in the "standard"
+way for 4.3BSD:
+it causes an E\fIxxx\fR error constant (from the
+list in /sys/h/errno.h)
+to be put into the user program's
+global variable \fIerrno\fR.
+In most routines, in particular
+those routines called directly or indirectly
+the by system-call routines,
+this is done
+by simply returning
+this integer value.
+The errors that fall into this category are described
+in the following table:
+.(b L
+.TS
+expand box tab(+);
+l l.
+Error+Meaning
+=
+EAFNOSUPPORT+Attempting to use an address family
+ +other than AF_ISO and AF_INET.
+_
+ENOPROTOOPT+TP was not configured at boot time.
+_
+ESOCKTNOSUPPORT+The given socket type is not supported.
+_
+EPROTOTYPE+Attempting to use an inappropriate transport
+ +class for the network service. (e.g. class 0 over CLNS)
+ +or attempting to use an unknown network service.
+_
+EISCONN+Attempting to perform on a connected socket an action
+ +that is permitted only on unconnected sockets.
+_
+ENOTCONN+Attempting to perform on an unconnected socket an
+ +action that is permitted only on connected sockets.
+_
+EMSGSIZE+Trying to send more data than are permitted on
+ +connect, disconnect, or expedited data PDUs.
+_
+ENOTSOCK+The integer argument passed in the system
+ +call is not a socket descriptor or is a socket but
+ +has no transport pcb.
+_
+EINVAL+Some argument to the system call is invalid.
+_
+EOPNOTSUPP+Some command argument to the system call is invalid
+ +or the operation is not supported.
+_
+EACCES+An unprivileged user tried to use a privileged command.
+_
+ETOOMANYREFS+TP ran out of reference blocks.
+_
+ENOBUFS+TP ran out of memory (mbufs).
+.TE
+.)b
+Errors that should be reported to the user
+by \fIerrno\fR but which occur asynchronously
+are detected by the socket layer when the value
+of the field \fIso_error\fR in the socket
+structure is non-zero.
+This is used to report such errors as
+ECONNRESET,
+ECONNABORTED, and
+ECONNTIMEDOUT, which are really remote-end errors.
+.sh 2 "Remote-end Errors"
+.pp
+An error that occurs is the result of a timer
+or is a result of an
+incoming TPDU
+is a remote-end error.
+The majority of these errors are parsing errors.
+They also include some protocol errors.
+Some of these errors cause the connection to be
+closed locally.
+It is unfortunate that when a connection is closed,
+the kernel will not permit the user program to perform
+anything on the socket in question, so the user cannot
+inquire about the reason for disconnection.
+There is no clean way to pass this information to a
+signal handler either, since the process being signalled
+may be swapped out at the time.
+Some of these errors cause TP to return an ER TPDU
+or a DR TPDU to the sending site.
+Some have no effect on the connection locally.
+These errors and their effects are described below.
+.(b L
+.TS
+expand box tab(+);
+l l l.
+Error+Meaning+Return code or action taken
+=
+Retransmission+The remote end has not responded +ETIMEDOUT
+timeout+to repeated attempts to send.+
+ +This can occur during connection+
+ +or after connection establishment.+
+_
+Inactivity+The remote end has not sent anything +ETIMEDOUT
+timeout+within the last \fIx\fR time, where+
++\fIx\fR is a locally defined+
++large value.+
+_
+Unacceptable+An unacceptable TPDU has arrived, and the+TPDU dropped
+TPDU +remote end can be identified.+possibly DR/ER returned
+_
+DR TPDU+A DR TPDU arrived, with any+Disconnect indication,
+arrived+value in the reason field.+so_error == ECONNRESET
+_
+ER TPDU+An ER TPDU arrived, with any+Disconnect indication,
+arrived+anything in the reason field.+so_error == ECONNABORTED
+.TE
+.)b
+TPDUs may be unacceptable for a variety of reasons:
+.(b L
+.TS
+expand box tab(+);
+l l.
+Problem+Action taken by TP
+=
+No connection at destination+Respond with DR, reason: session entity
+reference or reference frozen+not attached to TSAP
+_
+Invalid destination reference+Respond with DR, reason: mismatched
++references
+_
+Invalid parameter code+Respond with ER, cause: inval. param. code
+_
+Invalid DU type+Respond with ER, cause: invalid TPDU type
+_
+Invalid version number+ Respond with ER, cause: inval. param. code
+_
+Invalid suffix value+Respond with ER, cause: inval. param. value
+_
+Suffix missing or is of+Respond with DR, reason:
+invalid length+header or parameter length invalid
+_
+Invalid checksum+packet discarded
+_
+Can't find a connection+Respond with DR, reason:
+for (dest ref, src ref) pair+mismatched references
+_
+Old ACK TPDU+packet discarded, possibly send ACK w/ FCC
+_
+Class requested isn't supported+Respond with DR, reason: +negotiation failed
+_
+Invalid TPDU size parameter+Respond with ER, cause: inval. param. value
+_
+Illegal amount of data+Respond with DR, reason:
+on CR, CC, DR, or XPD+header or parameter length invalid
+_
+Header length and length+Respond with DR, reason:
+indicator field of TPDU don't agree+header or parameter length invalid
+.TE
+.)b
+.lp
+The file \fC<argo/iso_errno.h>\fR is a list
+of the error codes and diagnostic that can be returned
+from the peer transport entity in a DR TPDU or an ER TPDU,
+and those that can be returned from the CONS, initiated by the DCE,
+the remote DTE, or by the local network adapter.
+These error values are too numerous to list here.
+Most of them are taken from the ISO 8208 standard and the ISO 8073 standard.
+The ARGO distribution contains an expanded form of the BSD library
+routine \fIperror()\fR that prints an error messages for a given
+\fIerrno\fR value.
+.sh 2 "Internal Errors"
+.pp
+Some internal errors are the result of
+a lack of resources such as buffers.
+These are reported to the user with the
+global variable
+\fIerrno\fR
+set to a value from
+\fC<errno.h>\fR.
+The errors that fall into this category are described
+in the following table:
+.(b L
+.TS
+expand box tab(+);
+l l.
+Return code+Problem
+=
+ENOBUFS+TP ran out of mbufs.
+_
+EPROTONOOPT+TP hasn't been configured.
+_
+ETOOMANYREFS+TP ran out of (unfrozen) reference numbers.
+.TE
+.)b
+.pp
+Other
+internal errors are coding errors
+or errors of misinterpretation of a specification.
+They result in the printing of a message on the
+console followed by a system panic.
+The following panics have been coded into TP:
+.(b L
+.TS
+expand box tab(+);
+l l.
+\fIPanic()\fR message+Problem
+=
+tp_emit CR/CC+The length indicator field of a TPDU is longer than the
+ +amount of space in an mbuf; TP is attempting to send a
+ +CR TPDU that is too large (perhaps legal but too large for
+ +this implementation to manage).
+_
+tp_rcvoob: unexpected cluster+An incoming XPD TPDU was put into a cluster
+ +mbuf by a lower layer.
+_
+tp timeout table overflow+The system ran out of structures for TP timers.
+_
+tp: T_DETACH+The connected socket that is being detached has
+ +no parent socket.
+_
+tp_soisdisconnected+The socket head queue is
+ +corrupted.
+_
+tp_soisdisconnecting+The socket head queue is
+ +corrupted.
+_
+tpclnp_input: bad clnp_len +The length parameter passed by clnp
+ +is bad.
+_
+iso_control: SIOCDIFADDR+ioctl() system call passed down
+iso_control: SIOCSIFADDR+a null interface pointer
+_
+sofree dq+The list of socket structures is
+ +is inconsistent.
+.TE
+.)b
diff --git a/share/doc/iso/wisc/esis_design.nr b/share/doc/iso/wisc/esis_design.nr
new file mode 100644
index 0000000..437f3ee
--- /dev/null
+++ b/share/doc/iso/wisc/esis_design.nr
@@ -0,0 +1,114 @@
+.NC "The Design of the ARGO Network Layer"
+.sh 1 "End System to Intermediate System Routing Protocol"
+.pp
+The following sections describe the design of the End System to Intermediate
+System (ES-IS) Routing Exchange Protocol.
+.sh 2 "Overview"
+.nf
+- protocol involves sending/receiving hello pdus.
+- timers determine
+ - when to send information
+ - when to discard information
+- want to keep as much of the work outside of kernel
+- only put functions and tables in kernel when necessary
+.sh 2 "Supported Features (brief overview of each)"
+- report configuration (both ES and IS)
+- record configuration (both ES and IS)
+- flush configuration (both ES and IS)
+- query configuration (ES only)
+- configuration response (ES only)
+- request redirect (IS only)
+- record redirect (ES only)
+- flush old redirect (ES only)
+- multicast vs. broadcast (using broadcast only)
+.sh 2 "Kernel Resident Features"
+.sh 3 "Support for PDU Transmission"
+- need mechanism to send/receive PDUs
+- utilize ES-IS socket (like raw socket)
+- socket(AF_ISO, SOCK_DGRAM, ISOPROTO_ESIS)
+.sh 4 "Sending PDUs"
+- sendmsg() used for transmitting PDUS
+- data will be pre-formed ES-IS PDU
+- no checks will be made on the pdu format
+- addr_snpa is the destination (to)
+- before sending, socket must be associated with a particular interface
+ this is done via setsockopt():
+ ESISOPT_SETIF - option
+ buffer is name of interface, ie. "un0"
+.sh 4 "Receiving PDUs"
+- recvmsg() used for receiving PDUs
+- data will be:
+ #define ESIS_PDU
+ #define ESIS_CONFIG_RESP
+ #define ESIS_REDIR_REQ
+ struct esis_indication {
+ short ei_type; /* type of indication */
+ union {
+ struct ? config_resp
+ struct ? redir_req
+ char pdu[0]
+ } ei_u;
+ }
+- no checks will be made on the pdu format
+- addr_snpa is the source (from)
+.sh 4 "Addressing"
+- ES-IS PDUs are sent to SNPA.
+- addresses used are SN addresses, not NSAP addresses
+- format of msg_name (part of msghdr) struct sockaddr_iso
+ afi = 0 /* means special snpa address */
+ isoa_u is struct addr_snpa
+ struct addr_snpa {
+ char sn_addr[7]; /* snpa addr */
+ }
+ isoa_len is number of bytes of sn_addr that are valid
+
+- sn_addr may be a unicast or multicast address
+- multicast addresses will be faked via broadcast addresses
+.sh 3 "NSAP to SNPA translation"
+- translation from NSAP to SNAP required for every CLNP PDU sent
+- function provided by iso_nsap_to_snpa
+
+ iso_nsap_to_snpa(ifp, m, nsap, snpa)
+ struct ifnet *ifp; /* outgoing interface */
+ struct mbuf *m; /* pkt */
+ struct sockaddr_iso *nsap; /* destination address */
+ char *snpa; /* RESULT: snpa address */
+ {
+ if (nsap.afi == AFI_SNPAADDR) {
+ copy snpa addr from nsap into snpa
+ return SUCCESS
+ } else {
+ scan RIB for (RIB.nsap == nsap)
+ if (found) {
+ copy RIB.snpa into snpa
+ return SUCCESS
+ }
+ scan RIB for (RIB.type == IS) && (RIB.ifp = ifp)
+ if (found) {
+ copy RIB.snpa into snpa
+ return SUCCESS
+ }
+ if (ifp allows multicast) {
+ /* invoke query configuration function */
+ copy ifp.ifaddr.ifa_all_es into snpa
+ return SUCCESS
+ }
+
+ return FAILURE
+ }
+ }
+
+- NSAP to SNPA table resides in kernel so CLNP has quick access
+- entries added/timed-out of table via user level ES-IS daemon
+.sh 3 "Query Configuration Functon"
+- invoked when iso_nsap_to_snpa
+ - requires snpa, but
+ - does not find a match for dest nsap, and
+ - does not find a match for Any IS.
+- clnp packet is sent to address "all ES" as specified in ifnet structure
+ (for now, this is just the broadcast address)
+.sh 3 "Configuration Response Function"
+- invoked by clnp_input(), after determining that the packet received is
+ destined for one of its NSAPs
+- checks if sn_dst == "all ES" (for now, this is all hex ffs)
+- if true, a copy of packet is made, and passed up to esis_input()
diff --git a/share/doc/iso/wisc/figs/CONS_primitives.nr b/share/doc/iso/wisc/figs/CONS_primitives.nr
new file mode 100644
index 0000000..16dc3e0
--- /dev/null
+++ b/share/doc/iso/wisc/figs/CONS_primitives.nr
@@ -0,0 +1,77 @@
+.(b
+.TS
+tab(+) center expand box;
+c c
+a | a .
+service primitive & arguments+provided by
+=
+N_CONNECT.request+cons_openvc(... faddr, ...)
+called address+argument faddr
+calling address+not implemented
+receipt confirmation+not implemented
+expedited data+not implemented
+quality of service+not implemented
+NS-user data+not implemented
+_
+N_CONNECT.indication+not implemented
+_
+N_CONNECT.response+cons_netcmd( CONN_REFUSE )
++ or cons_netcmd( CONN_CONFIRM )
++ however, net connection has already
++ been accepted. If REFUSE, it will
++ be cleared with E_CO_HLI_REJT
++ (higher layer rejects connection)
+responding address+not implemented
+receipt confirmation+not implemented
+expedited data+not implemented
+quality of service+not implemented
+NS-user data+not implemented
+_
+N_CONNECT.confirm+not implemented
+=
+N_DATA.request+cons_output(... m, ...)
++and cosns_output(... m, ...)
+confirmation+not implemented
+data+mbuf chain m
+_
+N_DATA.indication+pr_input( m, ... )
++or software interrupt
+confirmation+not implemented
+data+mbuf chain
+_
+N_DATA_ACKNOWLEDGE.request+not implemented
+_
+N_DATA_ACKNOWLEDGE.indication+not implemented
+_
+N_EXPEDITED_DATA.request+not implemented
+_
+N_EXPEDITED_DATA.indication+not implemented
+=
+N_RESET.request+not implemented
+N_RESET.indication+socket->so_error = reason
++or pr_ctlinput( PRC_ROUTEDEAD )
+originator+not implemented
+reason+from X.25 packet or ecn driver
+N_RESET.response+not implemented
+N_RESET.confirm+not implemented
+=
+N_DISCONNECT.request+cons_netcmd( CONN_CLOSE )
+reason+uses E_CO_HLI_DISCN (normal
++disconnect from higher layer)
+responding address+not implemented
+NS_user data+not implemented
+_
+N_DISCONNECT.indication+socket->so_error = reason
++or pr_ctlinput( PRC_ROUTEDEAD )
+originator+not implemented
+reason+from X.25 packet or ecn driver
+responding address+not implemented
+NS_user data+not implemented
+.TE
+.(c
+\fBFigure \n+(FG\fR: Transport Service Primitives
+.)c
+.)b
+.(f
+\** data on disconnect is not supported at this time.
+.)f
diff --git a/share/doc/iso/wisc/figs/Makefile b/share/doc/iso/wisc/figs/Makefile
new file mode 100644
index 0000000..72aa29a
--- /dev/null
+++ b/share/doc/iso/wisc/figs/Makefile
@@ -0,0 +1,18 @@
+#
+#
+.SUFFIXES: .nr .grn
+
+PRINTER = ba
+
+ALL = \
+ func_units.nr unix_ipc.nr osi_addr.nr trans_flow.nr clnp_output.nr\
+ clnp_input.nr mbufsnd.nr mbufrcv.nr\
+ ecn_vc.nr ecn_network.nr ecn_queue.nr tppt.nr
+
+all: $(ALL)
+
+clean:
+ rm $(ALL)
+
+.grn.nr:
+ grn -P$(PRINTER) $*.grn > $*.nr
diff --git a/share/doc/iso/wisc/figs/NS_primitives.nr b/share/doc/iso/wisc/figs/NS_primitives.nr
new file mode 100644
index 0000000..20dc226
--- /dev/null
+++ b/share/doc/iso/wisc/figs/NS_primitives.nr
@@ -0,0 +1,69 @@
+.(b
+.TS
+tab(+) center box;
+c c
+a | a .
+service primitive & arguments+kernel procedure call & arguments
+=
+N_CONNECT.request+\fIcons_openvc(copcb,dstaddr,so)\fR
+called address+argument \fIdstaddr\fR
+calling address, expedited data selection+not implemented
+receipt confirmation selection+not implemented
+quality of service, NS-user data+not implemented
+_
+N_CONNECT.indication+not implemented
+_
+N_CONNECT.response+not implemented
+_
+N_CONNECT.confirm+return from \fIcons_openvc()\fR
+responding address, quality of service+not implemented
+receipt confirmation selection+not implemented
+expedited data selection, NS-user data+not implemented
+=
+N_DATA.request+\fIcons_output(isop,m,len,isdgm)\fR, and
+ +\fIcosns_output(ifp,m,dstaddr)\fR
+NS-user data+argument m (mbuf chain)
+confirmation request+not implemented
+_
+N_DATA.indication+software interrupt (CLNP), procedure
++call to \fItp_input()\fR
+NS-user data+mbuf chain on \fIclnlintrq\fR or
++argument to \fItp_input()\fR
+confirmation request+not implemented
+=
+N_DATA_ACKNOWLEDGE.request+not implemented
+_
+N_DATA_ACKNOWLEDGE.indication+not implemented
+=
+N_EXPEDITED_DATA.request+not implemented
+_
+N_EXPEDITED_DATA.indication+not implemented
+=
+N_RESET.request+not implemented
+_
+N_RESET.response+not implemented
+_
+N_RESET.indication+higher layer \fIpr_ctlinput(
++PRC_ROUTEDEAD, faddr, copcb)\fR
+originator+argument \fIfaddr\fR
+reason+implemented with so->so_errno for sockets
++that are attached to CONS PCBs
+_
+N_RESET.confirm+not implemented
+=
+N_DISCONNECT.request+\fIcons_netcmd(CONN_CLOSE,
++isop, channel, isdgm)\fR
+reason, NS-user data, responding address+not implemented
+_
+N_DISCONNECT.indication+higher layer \fIpr_ctlinput(
++PRC_ROUTEDEAD, faddr, copcb)\fR
+originator+argument \fIfaddr\fR
+reason+implemented with so->so_errno for sockets
++that are attached to CONS PCBs
+NS-user data, responding address+not implemented
+.TE
+.(c
+\fBFigure \n+(FG\fR: Network Service Primitives
+.\")
+.)c
+.)b
diff --git a/share/doc/iso/wisc/figs/TS_primitives.nr b/share/doc/iso/wisc/figs/TS_primitives.nr
new file mode 100644
index 0000000..3d27df3
--- /dev/null
+++ b/share/doc/iso/wisc/figs/TS_primitives.nr
@@ -0,0 +1,60 @@
+.(b
+.TS
+center expand box;
+c c
+a | a .
+service primitive & arguments Unix system calls & arguments
+=
+T_CONNECT.request \fIsocket(), connect(), setsockopt()\fR
+called address \fIconnect()\fR argument
+calling address \fIconnect()\fR argument
+quality of service not implemented
+buffer management \fIsetsockopt()\fR argument
+security not implemented
+data \fIsetsockopt(), getsockopt()\fR
+_
+T_CONNECT.indication return from \fIaccept(); getsockopt()\fR
+called address \fIaccept()\fR argument
+calling address \fIaccept()\fR argument
+quality of service not implemented
+security not implemented
+data \fIsetsockopt(), getsockopt()\fR
+_
+T_CONNECT.response no applicable system calls
+_
+T_CONNECT.confirm return from \fIconnect()\fR
+quality of service \fIgetsockopt()\fR argument
+data \fIsetsocktopt, getsockopt()\fR
+=
+T_DATA.request \fIrecvv(), sendv()\fR
+_
+T_DATA.indication return from \fIrecvv()\fR, \fIsendv()\fR, or \fIselect()\fR;
+ or signal SIGIO
+ ioctl(FIONREAD) tells how much has been
+ queued to read
+=
+T_EXPEDITED_DATA.request \fIsendv()\fR with MSG_OOB flag
+_
+T_EXPEDITED_DATA.indication SIGURG, \fIgetsockopt()\fR with TPFLAG_XPD,
+ return from \fIselect()\fR with exceptional
+ conditions mask
+=
+T_DISCONNECT.request \fIclose()\fR
+data \fIsetsockopt()\fR
+_
+T_DISCONNECT.indication SIGURG,
+ error return on other primitives
+reason errno
+data \fIgetsockopt()\**\fR
+=
+T_STATUS.request \fIgetsockopt()\fR, \fItpstat\fR utility program
+_
+T_STATUS.indication \fIgetsockopt()\fR, \fIselect()\fR, \fItpstat\fR
+.TE
+.(c
+\fBFigure \n+(FG\fR: Transport Service Primitives
+.)c
+.)b
+.(f
+\** data on disconnect is not supported at this time.
+.)f
diff --git a/share/doc/iso/wisc/figs/addrfmt.nr b/share/doc/iso/wisc/figs/addrfmt.nr
new file mode 100644
index 0000000..195a46e
--- /dev/null
+++ b/share/doc/iso/wisc/figs/addrfmt.nr
@@ -0,0 +1,22 @@
+.TS
+center,expand,box,tab(+);
+c s|c
+c|c|c.
+T{
+.na
+IDP: initial domain part
+T}+T{
+.na
+DSP: domain spedific part
+T}
+_+_+
+T{
+.na
+AFI: authority and format identifier
+T}+T{
+.na
+IDI: initial domain identifier
+T}+
+.TE
+.ce
+\fB Figure \n+(FG\fR: Format of OSI addresses
diff --git a/share/doc/iso/wisc/figs/clnp_input.grn b/share/doc/iso/wisc/figs/clnp_input.grn
new file mode 100644
index 0000000..f217b94
--- /dev/null
+++ b/share/doc/iso/wisc/figs/clnp_input.grn
@@ -0,0 +1,18 @@
+.(z
+.GS C
+width 6.0
+high 7.0
+1 8
+2 10
+3 12
+4 14
+sc 0.4
+narrow 1
+medium 3
+thick 7
+pointscale off
+file clnp_input.gsrc
+.GE
+.ce
+\fB Figure \n+(FG:\fR Flow of control for processing CLNP NPDUs
+.)z
diff --git a/share/doc/iso/wisc/figs/clnp_input.gsrc b/share/doc/iso/wisc/figs/clnp_input.gsrc
new file mode 100644
index 0000000..0c0852e
--- /dev/null
+++ b/share/doc/iso/wisc/figs/clnp_input.gsrc
@@ -0,0 +1,338 @@
+gremlinfile
+0 424.00 24.00
+3
+424.00 696.00
+424.00 704.00
+-1.00 -1.00
+5 0
+0
+ 3
+312.00 416.00
+560.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 696.00
+125.00 701.00
+128.00 699.00
+131.00 701.00
+128.00 696.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 264.00
+560.00 264.00
+560.00 704.00
+128.00 704.00
+128.00 696.00
+-1.00 -1.00
+5 0
+0
+ 3
+424.00 648.00
+427.00 643.00
+424.00 645.00
+421.00 643.00
+424.00 648.00
+-1.00 -1.00
+4 0
+0
+ 3
+232.00 672.00
+288.00 672.00
+288.00 632.00
+424.00 632.00
+-1.00 -1.00
+4 0
+0
+ 3
+232.00 608.00
+424.00 608.00
+-1.00 -1.00
+4 0
+0
+ 3
+232.00 544.00
+424.00 544.00
+-1.00 -1.00
+4 0
+0
+ 3
+232.00 480.00
+424.00 480.00
+-1.00 -1.00
+4 0
+0
+ 3
+232.00 352.00
+424.00 352.00
+424.00 648.00
+-1.00 -1.00
+4 0
+0
+ 3
+351.00 689.00
+351.00 656.00
+528.00 656.00
+528.00 689.00
+351.00 689.00
+-1.00 -1.00
+5 0
+0
+ 0
+360.00 664.00
+360.00 679.00
+360.00 679.00
+360.00 679.00
+-1.00 -1.00
+1 2
+14 Discard Packet
+ 3
+136.00 320.00
+141.00 323.00
+139.00 320.00
+141.00 317.00
+136.00 320.00
+-1.00 -1.00
+5 0
+0
+ 3
+136.00 384.00
+240.00 384.00
+240.00 320.00
+136.00 320.00
+-1.00 -1.00
+5 0
+0
+ 0
+56.00 280.00
+56.00 295.00
+56.00 295.00
+56.00 295.00
+-1.00 -1.00
+1 2
+12 Process NPDU
+ 3
+48.00 304.00
+48.00 271.00
+225.00 271.00
+225.00 304.00
+48.00 304.00
+-1.00 -1.00
+5 0
+0
+ 0
+56.00 600.00
+56.00 615.00
+56.00 615.00
+56.00 615.00
+-1.00 -1.00
+1 2
+18 Consistency Checks
+ 3
+47.00 498.00
+47.00 465.00
+224.00 465.00
+224.00 498.00
+47.00 498.00
+-1.00 -1.00
+5 0
+0
+ 0
+56.00 344.00
+56.00 359.00
+56.00 359.00
+56.00 359.00
+-1.00 -1.00
+1 2
+20 Reassemble Fragments
+ 3
+168.00 432.00
+168.00 400.00
+-1.00 -1.00
+5 0
+0
+ 3
+48.00 432.00
+48.00 400.00
+304.00 400.00
+304.00 432.00
+48.00 432.00
+-1.00 -1.00
+5 0
+0
+ 0
+200.00 408.00
+200.00 423.00
+200.00 423.00
+200.00 423.00
+-1.00 -1.00
+1 2
+12 Forward NPDU
+ 0
+56.00 408.00
+56.00 423.00
+56.00 423.00
+56.00 423.00
+-1.00 -1.00
+1 2
+9 Keep NPDU
+ 0
+56.00 472.00
+56.00 487.00
+56.00 487.00
+56.00 487.00
+-1.00 -1.00
+1 2
+15 Process Options
+ 0
+56.00 536.00
+56.00 551.00
+56.00 551.00
+56.00 551.00
+-1.00 -1.00
+1 2
+19 Extract Information
+ 0
+56.00 664.00
+56.00 679.00
+56.00 679.00
+56.00 679.00
+-1.00 -1.00
+1 2
+14 Dequeue Packet
+ 3
+131.00 311.00
+128.00 316.00
+131.00 314.00
+134.00 316.00
+131.00 311.00
+-1.00 -1.00
+5 0
+0
+ 3
+131.00 329.00
+131.00 310.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 332.00
+130.00 332.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 373.00
+127.00 378.00
+130.00 376.00
+133.00 378.00
+130.00 373.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 394.00
+130.00 373.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 460.00
+130.00 439.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 439.00
+127.00 444.00
+130.00 442.00
+133.00 444.00
+130.00 439.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 501.00
+127.00 506.00
+130.00 504.00
+133.00 506.00
+130.00 501.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 522.00
+130.00 501.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 588.00
+128.00 567.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 567.00
+125.00 572.00
+128.00 570.00
+131.00 572.00
+128.00 567.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 631.00
+125.00 636.00
+128.00 634.00
+131.00 636.00
+128.00 631.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 652.00
+128.00 631.00
+-1.00 -1.00
+5 0
+0
+ 3
+48.00 368.00
+48.00 335.00
+225.00 335.00
+225.00 368.00
+48.00 368.00
+-1.00 -1.00
+5 0
+0
+ 3
+47.00 562.00
+47.00 529.00
+224.00 529.00
+224.00 562.00
+47.00 562.00
+-1.00 -1.00
+5 0
+0
+ 3
+47.00 626.00
+47.00 593.00
+224.00 593.00
+224.00 626.00
+47.00 626.00
+-1.00 -1.00
+5 0
+0
+ 3
+47.00 689.00
+47.00 656.00
+224.00 656.00
+224.00 689.00
+47.00 689.00
+-1.00 -1.00
+5 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/clnp_input.nr b/share/doc/iso/wisc/figs/clnp_input.nr
new file mode 100644
index 0000000..01f6468
--- /dev/null
+++ b/share/doc/iso/wisc/figs/clnp_input.nr
@@ -0,0 +1,188 @@
+.(z
+.br
+.nr g1 3456u
+.nr g2 2964u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+\D't 1u'
+.sp -1
+.sp 101u
+\D'l 0u 223u'\D'l 1193u 0u'\D'l 0u -223u'\D'l -1193u 0u'
+.sp -1
+.sp 425u
+\D'l 0u 222u'\D'l 1193u 0u'\D'l 0u -222u'\D'l -1193u 0u'
+.sp -1
+.sp 431u
+\D'l 0u 222u'\D'l 1193u 0u'\D'l 0u -222u'\D'l -1193u 0u'
+.sp -1
+.sp 1306u
+\h'7u'\D'l 0u 222u'\D'l 1192u 0u'\D'l 0u -222u'\D'l -1192u 0u'
+.sp -1
+.sp -1912u
+\h'546u'\D'l 0u 141u'
+.sp -1
+.sp 141u
+\h'546u'\D'l -20u -34u'\D'l 20u 14u'\D'l 20u -14u'\D'l -20u 34u'
+.sp -1
+.sp 431u
+\h'546u'\D'l -20u -33u'\D'l 20u 13u'\D'l 20u -13u'\D'l -20u 33u'
+.sp -1
+.sp -141u
+\h'546u'\D'l 0u 141u'
+.sp -1
+.sp 444u
+\h'559u'\D'l 0u 141u'
+.sp -1
+.sp 141u
+\h'559u'\D'l -20u -34u'\D'l 20u 14u'\D'l 21u -14u'\D'l -21u 34u'
+.sp -1
+.sp 418u
+\h'559u'\D'l -20u -34u'\D'l 20u 13u'\D'l 21u -13u'\D'l -21u 34u'
+.sp -1
+.sp -142u
+\h'559u'\D'l 0u 142u'
+.sp -1
+.sp 445u
+\h'559u'\D'l 0u 141u'
+.sp -1
+.sp 141u
+\h'559u'\D'l -20u -33u'\D'l 20u 13u'\D'l 21u -13u'\D'l -21u 33u'
+.sp -1
+.sp 276u
+\h'559u'\D'l 0u 0u'
+.sp -1
+.sp 21u
+\h'566u'\D'l 0u 128u'
+.sp -1
+.sp 121u
+\h'566u'\D'l -20u -34u'\D'l 20u 14u'\D'l 20u -14u'\D'l -20u 34u'
+.sp -1
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Dequeue Packet
+.sp -2377u
+\h'61u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Extract Information
+.sp -1515u
+\h'61u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Process Options
+.sp -1085u
+\h'61u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Keep NPDU
+.sp -654u
+\h'61u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Forward NPDU
+.sp -654u
+\h'1031u'\&\*(g9
+.sp |\n(g8u
+.sp -815u
+\h'7u'\D'l 0u 215u'\D'l 1725u 0u'\D'l 0u -215u'\D'l -1725u 0u'
+.sp -1
+\h'815u'\D'l 0u 215u'
+.sp -1
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Reassemble Fragments
+.sp 593u
+\h'61u'\&\*(g9
+.sp |\n(g8u
+.sp -445u
+\D'l 0u 222u'\D'l 1193u 0u'\D'l 0u -222u'\D'l -1193u 0u'
+.sp -1
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Consistency Checks
+.sp -686u
+\h'61u'\&\*(g9
+.sp |\n(g8u
+.sp 1307u
+\h'7u'\D'l 0u 222u'\D'l 1192u 0u'\D'l 0u -222u'\D'l -1192u 0u'
+.sp -1
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Process NPDU
+.sp 162u
+\h'61u'\&\*(g9
+.sp |\n(g8u
+.sp -539u
+\h'600u'\D'l 700u 0u'\D'l 0u 431u'\D'l -700u 0u'
+.sp -1
+.sp 431u
+\h'600u'\D'l 33u -20u'\D'l -13u 20u'\D'l 13u 20u'\D'l -33u -20u'
+.sp -1
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Discard Packet
+.sp -2316u
+\h'2109u'\&\*(g9
+.sp |\n(g8u
+.sp -2485u
+\h'2048u'\D'l 0u 223u'\D'l 1193u 0u'\D'l 0u -223u'\D'l -1193u 0u'
+.sp -1
+\D's 16u'
+.sp -1
+.sp 2270u
+\h'1246u'\D'l 1294u 0u'\D'l 0u -1993u'
+.sp -1
+.sp -863u
+\h'1246u'\D'l 1294u 0u'
+.sp -1
+.sp -430u
+\h'1246u'\D'l 1294u 0u'
+.sp -1
+.sp -431u
+\h'1246u'\D'l 1294u 0u'
+.sp -1
+.sp -431u
+\h'1246u'\D'l 378u 0u'\D'l 0u 269u'\D'l 916u 0u'
+.sp -1
+.sp 162u
+\h'2540u'\D'l 20u 33u'\D'l -20u -13u'\D'l -20u 13u'\D'l 20u -33u'
+.sp -1
+\D's -1u'
+.sp -1
+.sp 2586u
+\h'546u'\D'l 2910u 0u'\D'l 0u -2964u'\D'l -2910u 0u'\D'l 0u 54u'
+.sp -1
+.sp -2910u
+\h'546u'\D'l -20u -34u'\D'l 20u 14u'\D'l 20u -14u'\D'l -20u 34u'
+.sp -1
+.sp 1886u
+\h'1785u'\D'l 1671u 0u'
+.sp -1
+.sp -1886u
+\h'2540u'\D'l 0u -54u'
+.sp -1
+.sp 2910u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fB Figure \n+(FG:\fR Flow of control for processing CLNP NPDUs
+.)z
diff --git a/share/doc/iso/wisc/figs/clnp_output.grn b/share/doc/iso/wisc/figs/clnp_output.grn
new file mode 100644
index 0000000..5025eee
--- /dev/null
+++ b/share/doc/iso/wisc/figs/clnp_output.grn
@@ -0,0 +1,18 @@
+.(z
+.GS C
+width 6.0
+high 7.0
+1 8
+2 10
+3 12
+4 14
+sc 0.4
+narrow 1
+medium 3
+thick 7
+pointscale off
+file clnp_output.gsrc
+.GE
+.ce
+\fB Figure \n+(FG:\fR Flow of control for emitting CLNP NPDUs
+.)z
diff --git a/share/doc/iso/wisc/figs/clnp_output.gsrc b/share/doc/iso/wisc/figs/clnp_output.gsrc
new file mode 100644
index 0000000..49a0186
--- /dev/null
+++ b/share/doc/iso/wisc/figs/clnp_output.gsrc
@@ -0,0 +1,376 @@
+gremlinfile
+0 528.00 32.00
+3
+528.00 688.00
+531.00 683.00
+528.00 685.00
+525.00 683.00
+528.00 688.00
+-1.00 -1.00
+5 0
+0
+ 3
+176.00 160.00
+176.00 144.00
+528.00 144.00
+528.00 688.00
+-1.00 -1.00
+5 0
+0
+ 0
+272.00 672.00
+272.00 685.00
+272.00 685.00
+272.00 685.00
+-1.00 -1.00
+2 2
+6 EINVAL
+ 3
+240.00 672.00
+528.00 672.00
+-1.00 -1.00
+4 0
+0
+ 3
+128.00 652.00
+128.00 631.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 631.00
+125.00 636.00
+128.00 634.00
+131.00 636.00
+128.00 631.00
+-1.00 -1.00
+5 0
+0
+ 0
+64.00 672.00
+64.00 687.00
+64.00 687.00
+64.00 687.00
+-1.00 -1.00
+1 2
+15 Examine Options
+ 3
+47.00 689.00
+47.00 656.00
+224.00 656.00
+224.00 689.00
+47.00 689.00
+-1.00 -1.00
+5 0
+0
+ 0
+64.00 608.00
+64.00 623.00
+64.00 623.00
+64.00 623.00
+-1.00 -1.00
+1 2
+15 Check Addresses
+ 0
+64.00 546.00
+64.00 561.00
+64.00 561.00
+64.00 561.00
+-1.00 -1.00
+1 2
+20 Allocate Header mbuf
+ 0
+64.00 481.00
+64.00 496.00
+64.00 496.00
+64.00 496.00
+-1.00 -1.00
+1 2
+17 Create Fixed Part
+ 0
+64.00 417.00
+64.00 432.00
+64.00 432.00
+64.00 432.00
+-1.00 -1.00
+1 2
+12 Route Packet
+ 0
+64.00 352.00
+64.00 367.00
+64.00 367.00
+64.00 367.00
+-1.00 -1.00
+1 2
+19 Append Address Part
+ 0
+64.00 290.00
+64.00 305.00
+64.00 305.00
+64.00 305.00
+-1.00 -1.00
+1 2
+19 Append Options Part
+ 0
+64.00 225.00
+64.00 240.00
+64.00 240.00
+64.00 240.00
+-1.00 -1.00
+1 2
+13 Transmit NPDU
+ 0
+192.00 224.00
+192.00 239.00
+192.00 239.00
+192.00 239.00
+-1.00 -1.00
+1 2
+13 Fragment NPDU
+ 3
+47.00 625.00
+47.00 592.00
+224.00 592.00
+224.00 625.00
+47.00 625.00
+-1.00 -1.00
+5 0
+0
+ 3
+47.00 562.00
+47.00 529.00
+224.00 529.00
+224.00 562.00
+47.00 562.00
+-1.00 -1.00
+5 0
+0
+ 3
+47.00 498.00
+47.00 465.00
+224.00 465.00
+224.00 498.00
+47.00 498.00
+-1.00 -1.00
+5 0
+0
+ 3
+48.00 433.00
+48.00 400.00
+225.00 400.00
+225.00 433.00
+48.00 433.00
+-1.00 -1.00
+5 0
+0
+ 3
+47.00 368.00
+47.00 335.00
+224.00 335.00
+224.00 368.00
+47.00 368.00
+-1.00 -1.00
+5 0
+0
+ 3
+48.00 304.00
+48.00 271.00
+225.00 271.00
+225.00 304.00
+48.00 304.00
+-1.00 -1.00
+5 0
+0
+ 3
+49.00 240.00
+49.00 209.00
+306.00 209.00
+306.00 240.00
+49.00 240.00
+-1.00 -1.00
+5 0
+0
+ 3
+167.00 239.00
+167.00 209.00
+-1.00 -1.00
+1 0
+0
+ 3
+128.00 588.00
+128.00 567.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 567.00
+125.00 572.00
+128.00 570.00
+131.00 572.00
+128.00 567.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 503.00
+125.00 508.00
+128.00 506.00
+131.00 508.00
+128.00 503.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 524.00
+128.00 503.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 458.00
+130.00 437.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 437.00
+127.00 442.00
+130.00 440.00
+133.00 442.00
+130.00 437.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 375.00
+127.00 380.00
+130.00 378.00
+133.00 380.00
+130.00 375.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 396.00
+130.00 375.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 330.00
+130.00 309.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 309.00
+127.00 314.00
+130.00 312.00
+133.00 314.00
+130.00 309.00
+-1.00 -1.00
+5 0
+0
+ 3
+130.00 268.00
+130.00 268.00
+-1.00 -1.00
+5 0
+0
+ 3
+131.00 265.00
+131.00 246.00
+-1.00 -1.00
+5 0
+0
+ 3
+131.00 247.00
+128.00 252.00
+131.00 250.00
+134.00 252.00
+131.00 247.00
+-1.00 -1.00
+5 0
+0
+ 3
+146.00 320.00
+242.00 320.00
+242.00 255.00
+146.00 255.00
+-1.00 -1.00
+5 0
+0
+ 3
+146.00 255.00
+151.00 258.00
+149.00 255.00
+151.00 252.00
+146.00 255.00
+-1.00 -1.00
+5 0
+0
+ 0
+272.00 608.00
+272.00 621.00
+272.00 621.00
+272.00 621.00
+-1.00 -1.00
+2 2
+12 ENAMETOOLONG
+ 0
+273.00 545.00
+273.00 558.00
+273.00 558.00
+273.00 558.00
+-1.00 -1.00
+2 2
+7 ENOBUFS
+ 0
+272.00 417.00
+272.00 430.00
+272.00 430.00
+272.00 430.00
+-1.00 -1.00
+2 2
+25 ENETUNREACH, EHOSTUNREACH
+ 0
+272.00 289.00
+272.00 302.00
+272.00 302.00
+272.00 302.00
+-1.00 -1.00
+2 2
+7 ENOBUFS
+ 3
+241.00 601.00
+529.00 601.00
+-1.00 -1.00
+4 0
+0
+ 3
+240.00 536.00
+530.00 536.00
+-1.00 -1.00
+4 0
+0
+ 3
+241.00 413.00
+527.00 413.00
+-1.00 -1.00
+4 0
+0
+ 3
+233.00 288.00
+529.00 288.00
+-1.00 -1.00
+4 0
+0
+ 0
+152.00 175.00
+152.00 190.00
+152.00 190.00
+152.00 190.00
+-1.00 -1.00
+1 2
+7 SUCCESS
+ -1
diff --git a/share/doc/iso/wisc/figs/clnp_output.nr b/share/doc/iso/wisc/figs/clnp_output.nr
new file mode 100644
index 0000000..b11d465
--- /dev/null
+++ b/share/doc/iso/wisc/figs/clnp_output.nr
@@ -0,0 +1,233 @@
+.(z
+.br
+.nr g1 3456u
+.nr g2 3891u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "SUCCESS
+.sp 3670u
+\h'750u'\&\*(g9
+.sp |\n(g8u
+\D's 16u'\D't 1u'
+.sp -1
+.sp 2863u
+\h'1328u'\D'l 2114u 0u'
+.sp -1
+.sp -893u
+\h'1385u'\D'l 2043u 0u'
+.sp -1
+.sp -877u
+\h'1378u'\D'l 2071u 0u'
+.sp -1
+.sp -464u
+\h'1385u'\D'l 2057u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "ENOBUFS
+.sp 2227u
+\h'1607u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "ENETUNREACH, EHOSTUNREACH
+.sp 1313u
+\h'1607u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "ENOBUFS
+.sp 400u
+\h'1614u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "ENAMETOOLONG
+.sp -50u
+\h'1607u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'
+.sp -1
+.sp 2469u
+\h'707u'\D'l 36u -21u'\D'l -15u 21u'\D'l 15u 22u'\D'l -36u -22u'
+.sp -1
+.sp -464u
+\h'707u'\D'l 686u 0u'\D'l 0u 464u'\D'l -686u 0u'
+.sp -1
+.sp 522u
+\h'600u'\D'l -22u -36u'\D'l 22u 14u'\D'l 21u -14u'\D'l -21u 36u'
+.sp -1
+.sp -129u
+\h'600u'\D'l 0u 136u'
+.sp -1
+.sp -21u
+\h'593u'\D'l 0u 0u'
+.sp -1
+.sp -293u
+\h'593u'\D'l -22u -36u'\D'l 22u 14u'\D'l 21u -14u'\D'l -21u 36u'
+.sp -1
+.sp -150u
+\h'593u'\D'l 0u 150u'
+.sp -1
+.sp -471u
+\h'593u'\D'l 0u 150u'
+.sp -1
+.sp 150u
+\h'593u'\D'l -22u -36u'\D'l 22u 14u'\D'l 21u -14u'\D'l -21u 36u'
+.sp -1
+.sp -443u
+\h'593u'\D'l -22u -36u'\D'l 22u 14u'\D'l 21u -14u'\D'l -21u 36u'
+.sp -1
+.sp -150u
+\h'593u'\D'l 0u 150u'
+.sp -1
+.sp -470u
+\h'578u'\D'l 0u 149u'
+.sp -1
+.sp 149u
+\h'578u'\D'l -21u -36u'\D'l 21u 14u'\D'l 22u -14u'\D'l -22u 36u'
+.sp -1
+.sp -456u
+\h'578u'\D'l -21u -36u'\D'l 21u 14u'\D'l 22u -14u'\D'l -22u 36u'
+.sp -1
+.sp -150u
+\h'578u'\D'l 0u 150u'
+.sp -1
+\D's 4u'
+.sp -1
+.sp 2491u
+\h'857u'\D'l 0u 214u'
+.sp -1
+\D's -1u'
+.sp -1
+.sp -7u
+\h'14u'\D'l 0u 221u'\D'l 1835u 0u'\D'l 0u -221u'\D'l -1835u 0u'
+.sp -1
+.sp -457u
+\h'7u'\D'l 0u 235u'\D'l 1264u 0u'\D'l 0u -235u'\D'l -1264u 0u'
+.sp -1
+.sp -457u
+\D'l 0u 235u'\D'l 1264u 0u'\D'l 0u -235u'\D'l -1264u 0u'
+.sp -1
+.sp -465u
+\h'7u'\D'l 0u 236u'\D'l 1264u 0u'\D'l 0u -236u'\D'l -1264u 0u'
+.sp -1
+.sp -464u
+\D'l 0u 236u'\D'l 1264u 0u'\D'l 0u -236u'\D'l -1264u 0u'
+.sp -1
+.sp -456u
+\D'l 0u 236u'\D'l 1264u 0u'\D'l 0u -236u'\D'l -1264u 0u'
+.sp -1
+.sp -450u
+\D'l 0u 236u'\D'l 1264u 0u'\D'l 0u -236u'\D'l -1264u 0u'
+.sp -1
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Fragment NPDU
+.sp 2863u
+\h'1035u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Transmit NPDU
+.sp 2856u
+\h'121u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Append Options Part
+.sp 2392u
+\h'121u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Append Address Part
+.sp 1949u
+\h'121u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Route Packet
+.sp 1485u
+\h'121u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Create Fixed Part
+.sp 1028u
+\h'121u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Allocate Header mbuf
+.sp 565u
+\h'121u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Check Addresses
+.sp 122u
+\h'121u'\&\*(g9
+.sp |\n(g8u
+.sp -457u
+\D'l 0u 236u'\D'l 1264u 0u'\D'l 0u -236u'\D'l -1264u 0u'
+.sp -1
+.ft R
+.ps 10
+.nr g8 \n(.d
+.ds g9 "Examine Options
+.sp 122u
+\h'121u'\&\*(g9
+.sp |\n(g8u
+.sp 415u
+\h'578u'\D'l -21u -36u'\D'l 21u 14u'\D'l 22u -14u'\D'l -22u 36u'
+.sp -1
+.sp -150u
+\h'578u'\D'l 0u 150u'
+.sp -1
+\D's 16u'
+.sp -1
+.sp -143u
+\h'1378u'\D'l 2057u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "EINVAL
+\h'1607u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'
+.sp -1
+.sp 3655u
+\h'921u'\D'l 0u 114u'\D'l 2514u 0u'\D'l 0u -3883u'
+.sp -1
+.sp -3769u
+\h'3435u'\D'l 21u 35u'\D'l -21u -14u'\D'l -22u 14u'\D'l 22u -35u'
+.sp -1
+.sp 3883u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fB Figure \n+(FG:\fR Flow of control for emitting CLNP NPDUs
+.)z
diff --git a/share/doc/iso/wisc/figs/ecn_network.grn b/share/doc/iso/wisc/figs/ecn_network.grn
new file mode 100644
index 0000000..d58c5d8
--- /dev/null
+++ b/share/doc/iso/wisc/figs/ecn_network.grn
@@ -0,0 +1,19 @@
+.(z
+.hl
+.GS C
+width 6.0
+high 4.0
+1 6
+2 8
+3 10
+4 12
+sc 0.5
+narrow 1
+medium 3
+thick 7
+pointscale off
+file ecn_network.gsrc
+.GE
+.ce
+\fBFigure \n+(FG:\fR The X.25 Network Interface
+.)z
diff --git a/share/doc/iso/wisc/figs/ecn_network.gsrc b/share/doc/iso/wisc/figs/ecn_network.gsrc
new file mode 100644
index 0000000..1772b1d
--- /dev/null
+++ b/share/doc/iso/wisc/figs/ecn_network.gsrc
@@ -0,0 +1,288 @@
+gremlinfile
+0 370.00 354.00
+0
+370.00 346.00
+370.00 361.00
+370.00 361.00
+370.00 361.00
+-1.00 -1.00
+1 2
+10 (/dev/bus)
+ 0
+360.00 361.00
+360.00 376.00
+360.00 376.00
+360.00 376.00
+-1.00 -1.00
+1 2
+13 X.25 download
+ 0
+558.00 524.00
+558.00 539.00
+558.00 539.00
+558.00 539.00
+-1.00 -1.00
+1 2
+13 configuration
+ 0
+558.00 534.00
+558.00 549.00
+558.00 549.00
+558.00 549.00
+-1.00 -1.00
+1 2
+8 updated
+ 0
+358.00 459.00
+358.00 474.00
+358.00 474.00
+358.00 474.00
+-1.00 -1.00
+1 2
+12 (/dev/kmem1)
+ 0
+362.00 474.00
+362.00 489.00
+362.00 489.00
+362.00 489.00
+-1.00 -1.00
+1 2
+13 configuration
+ 0
+248.00 399.00
+248.00 414.00
+248.00 414.00
+248.00 414.00
+-1.00 -1.00
+1 2
+22 Data Area on the board
+ 0
+248.00 409.00
+248.00 424.00
+248.00 424.00
+248.00 424.00
+-1.00 -1.00
+1 2
+22 to and from the Common
+ 0
+245.00 507.00
+245.00 522.00
+245.00 522.00
+245.00 522.00
+-1.00 -1.00
+1 2
+10 ecnioctl()
+ 0
+245.00 516.00
+245.00 531.00
+245.00 531.00
+245.00 531.00
+-1.00 -1.00
+1 2
+12 ecnrestart()
+ 0
+245.00 524.00
+245.00 539.00
+245.00 539.00
+245.00 539.00
+-1.00 -1.00
+1 2
+13 ecnshutdown()
+ 0
+176.00 419.00
+176.00 434.00
+176.00 434.00
+176.00 434.00
+-1.00 -1.00
+1 2
+34 INTERFACES: the NCB command loaded
+ 0
+175.00 532.00
+175.00 547.00
+175.00 547.00
+175.00 547.00
+-1.00 -1.00
+1 2
+23 INTERFACES: ecnoutput()
+ 0
+42.00 415.00
+42.00 430.00
+42.00 430.00
+42.00 430.00
+-1.00 -1.00
+1 2
+15 COMMANDS: NCB_*
+ 0
+42.00 527.00
+42.00 542.00
+42.00 542.00
+42.00 542.00
+-1.00 -1.00
+1 2
+15 COMMANDS: ECN_*
+ 3
+546.00 511.00
+553.00 494.00
+560.00 511.00
+-1.00 -1.00
+5 0
+0
+ 3
+287.00 364.00
+270.00 357.00
+287.00 348.00
+-1.00 -1.00
+5 0
+0
+ 3
+287.00 477.00
+271.00 469.00
+287.00 461.00
+-1.00 -1.00
+5 0
+0
+ 3
+151.00 397.00
+159.00 382.00
+167.00 398.00
+-1.00 -1.00
+5 0
+0
+ 3
+151.00 431.00
+159.00 445.00
+167.00 431.00
+-1.00 -1.00
+5 0
+0
+ 3
+153.00 510.00
+160.00 494.00
+168.00 510.00
+-1.00 -1.00
+5 0
+0
+ 3
+152.00 540.00
+160.00 558.00
+167.00 540.00
+-1.00 -1.00
+5 0
+0
+ 3
+272.00 469.00
+492.00 469.00
+-1.00 -1.00
+5 0
+0
+ 3
+271.00 357.00
+552.00 357.00
+552.00 446.00
+-1.00 -1.00
+5 0
+0
+ 3
+553.00 557.00
+553.00 494.00
+-1.00 -1.00
+5 0
+0
+ 3
+159.00 445.00
+159.00 381.00
+-1.00 -1.00
+5 0
+0
+ 3
+160.00 557.00
+160.00 494.00
+-1.00 -1.00
+5 0
+0
+ 0
+517.00 458.00
+517.00 472.00
+517.00 472.00
+517.00 472.00
+-1.00 -1.00
+1 3
+8 %ecnload
+ 0
+514.00 570.00
+514.00 584.00
+514.00 584.00
+514.00 584.00
+-1.00 -1.00
+1 3
+8 %ecnconf
+ 0
+115.00 347.00
+115.00 366.00
+115.00 366.00
+115.00 366.00
+-1.00 -1.00
+1 4
+11 EICON Board
+ 0
+114.00 458.00
+114.00 477.00
+114.00 477.00
+114.00 477.00
+-1.00 -1.00
+1 4
+11 UNIX Driver
+ 0
+133.00 569.00
+133.00 588.00
+133.00 588.00
+133.00 588.00
+-1.00 -1.00
+1 4
+4 CONS
+ 3
+493.00 445.00
+493.00 493.00
+608.00 493.00
+608.00 445.00
+493.00 445.00
+-1.00 -1.00
+5 0
+0
+ 3
+493.00 557.00
+493.00 605.00
+608.00 605.00
+608.00 557.00
+493.00 557.00
+-1.00 -1.00
+5 0
+0
+ 3
+63.00 332.00
+63.00 381.00
+272.00 381.00
+272.00 332.00
+63.00 332.00
+-1.00 -1.00
+5 0
+0
+ 3
+63.00 445.00
+63.00 494.00
+272.00 494.00
+272.00 445.00
+63.00 445.00
+-1.00 -1.00
+5 0
+0
+ 3
+63.00 557.00
+63.00 606.00
+272.00 606.00
+272.00 557.00
+63.00 557.00
+-1.00 -1.00
+5 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/ecn_network.nr b/share/doc/iso/wisc/figs/ecn_network.nr
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/share/doc/iso/wisc/figs/ecn_network.nr
diff --git a/share/doc/iso/wisc/figs/ecn_queue.grn b/share/doc/iso/wisc/figs/ecn_queue.grn
new file mode 100644
index 0000000..5a9824d
--- /dev/null
+++ b/share/doc/iso/wisc/figs/ecn_queue.grn
@@ -0,0 +1,19 @@
+.(z
+.hl
+.GS C
+width 6.0
+high 3.0
+1 5
+2 7
+3 9
+4 12
+sc 0.5
+narrow 1
+medium 3
+thick 7
+pointscale off
+file ecn_queue.gsrc
+.GE
+.ce
+\fBFigure \n+(FG:\fR Queue Placement Strategy
+.)z
diff --git a/share/doc/iso/wisc/figs/ecn_queue.gsrc b/share/doc/iso/wisc/figs/ecn_queue.gsrc
new file mode 100644
index 0000000..81e3e07
--- /dev/null
+++ b/share/doc/iso/wisc/figs/ecn_queue.gsrc
@@ -0,0 +1,371 @@
+gremlinfile
+0 98.00 422.00
+3
+98.00 278.00
+562.00 278.00
+-1.00 -1.00
+4 0
+0
+ 3
+425.00 557.00
+577.00 557.00
+-1.00 -1.00
+4 0
+0
+ 3
+341.00 528.00
+425.00 528.00
+-1.00 -1.00
+5 0
+0
+ 0
+457.00 536.00
+457.00 549.00
+457.00 549.00
+457.00 549.00
+-1.00 -1.00
+2 3
+10 ECN driver
+ 0
+321.00 419.00
+321.00 432.00
+321.00 432.00
+321.00 432.00
+-1.00 -1.00
+2 2
+20 Driver->CONS replies
+ 0
+356.00 584.00
+356.00 599.00
+356.00 599.00
+356.00 599.00
+-1.00 -1.00
+1 2
+8 x25intrq
+ 0
+457.00 253.00
+457.00 266.00
+457.00 266.00
+457.00 266.00
+-1.00 -1.00
+2 3
+16 EICON X.25 board
+ 0
+457.00 285.00
+457.00 298.00
+457.00 298.00
+457.00 298.00
+-1.00 -1.00
+2 3
+10 ECN driver
+ 0
+457.00 563.00
+457.00 576.00
+457.00 576.00
+457.00 576.00
+-1.00 -1.00
+2 3
+11 CONS module
+ 3
+217.00 557.00
+340.00 557.00
+-1.00 -1.00
+4 0
+0
+ 3
+90.00 556.00
+131.00 556.00
+-1.00 -1.00
+4 0
+0
+ 3
+375.00 222.00
+381.00 209.00
+389.00 222.00
+-1.00 -1.00
+5 0
+0
+ 3
+168.00 222.00
+174.00 209.00
+182.00 222.00
+-1.00 -1.00
+5 0
+0
+ 3
+165.00 615.00
+171.00 602.00
+179.00 615.00
+-1.00 -1.00
+5 0
+0
+ 3
+166.00 421.00
+172.00 408.00
+180.00 421.00
+-1.00 -1.00
+5 0
+0
+ 3
+173.00 392.00
+189.00 371.00
+-1.00 -1.00
+1 0
+0
+ 3
+173.00 348.00
+172.00 392.00
+273.00 360.00
+-1.00 -1.00
+1 0
+0
+ 3
+306.00 361.00
+382.00 414.00
+-1.00 -1.00
+1 0
+0
+ 3
+266.00 228.00
+273.00 243.00
+280.00 228.00
+-1.00 -1.00
+5 0
+0
+ 0
+201.00 197.00
+201.00 210.00
+201.00 210.00
+201.00 210.00
+-1.00 -1.00
+2 2
+23 Driver<->Board commands
+ 3
+273.00 246.00
+273.00 214.00
+-1.00 -1.00
+1 0
+0
+ 0
+223.00 295.00
+223.00 308.00
+223.00 308.00
+223.00 308.00
+-1.00 -1.00
+2 2
+15 posted commands
+ 3
+111.00 402.00
+131.00 402.00
+-1.00 -1.00
+5 0
+0
+ 3
+376.00 459.00
+383.00 474.00
+390.00 459.00
+-1.00 -1.00
+5 0
+0
+ 3
+364.00 363.00
+383.00 416.00
+387.00 357.00
+-1.00 -1.00
+1 0
+0
+ 3
+383.00 437.00
+383.00 473.00
+-1.00 -1.00
+1 0
+0
+ 3
+172.00 411.00
+172.00 474.00
+-1.00 -1.00
+1 0
+0
+ 0
+8.00 401.00
+8.00 416.00
+8.00 416.00
+8.00 416.00
+-1.00 -1.00
+1 2
+15 ecn_pending_req
+ 0
+109.00 653.00
+109.00 666.00
+109.00 666.00
+109.00 666.00
+-1.00 -1.00
+2 2
+20 CONS->Driver command
+ 0
+357.00 570.00
+357.00 585.00
+357.00 585.00
+357.00 585.00
+-1.00 -1.00
+1 2
+5 QUEUE
+ 0
+151.00 569.00
+151.00 584.00
+151.00 584.00
+151.00 584.00
+-1.00 -1.00
+1 2
+5 QUEUE
+ 3
+340.00 315.00
+340.00 298.00
+422.00 298.00
+422.00 315.00
+340.00 315.00
+-1.00 -1.00
+6 0
+0
+ 3
+235.00 347.00
+235.00 330.00
+317.00 330.00
+317.00 347.00
+235.00 347.00
+-1.00 -1.00
+6 0
+0
+ 3
+232.00 262.00
+232.00 245.00
+314.00 245.00
+314.00 262.00
+232.00 262.00
+-1.00 -1.00
+6 0
+0
+ 3
+133.00 329.00
+133.00 312.00
+215.00 312.00
+215.00 329.00
+133.00 329.00
+-1.00 -1.00
+6 0
+0
+ 3
+133.00 409.00
+133.00 392.00
+215.00 392.00
+215.00 409.00
+133.00 409.00
+-1.00 -1.00
+6 0
+0
+ 3
+340.00 547.00
+426.00 547.00
+-1.00 -1.00
+5 0
+0
+ 3
+340.00 509.00
+425.00 509.00
+-1.00 -1.00
+5 0
+0
+ 3
+340.00 491.00
+424.00 491.00
+-1.00 -1.00
+5 0
+0
+ 3
+340.00 602.00
+340.00 473.00
+425.00 473.00
+425.00 601.00
+-1.00 -1.00
+6 0
+0
+ 3
+132.00 547.00
+218.00 547.00
+-1.00 -1.00
+5 0
+0
+ 3
+133.00 528.00
+217.00 528.00
+-1.00 -1.00
+5 0
+0
+ 3
+132.00 509.00
+217.00 509.00
+-1.00 -1.00
+5 0
+0
+ 3
+132.00 491.00
+216.00 491.00
+-1.00 -1.00
+5 0
+0
+ 3
+132.00 602.00
+132.00 473.00
+217.00 473.00
+217.00 601.00
+-1.00 -1.00
+6 0
+0
+ 3
+125.00 410.00
+132.00 402.00
+125.00 395.00
+-1.00 -1.00
+5 0
+0
+ 3
+174.00 211.00
+174.00 312.00
+-1.00 -1.00
+1 0
+0
+ 3
+381.00 210.00
+381.00 298.00
+-1.00 -1.00
+1 0
+0
+ 3
+374.00 282.00
+381.00 297.00
+388.00 282.00
+-1.00 -1.00
+5 0
+0
+ 3
+167.00 297.00
+174.00 312.00
+181.00 297.00
+-1.00 -1.00
+5 0
+0
+ 0
+156.00 583.00
+156.00 598.00
+156.00 598.00
+156.00 598.00
+-1.00 -1.00
+1 2
+6 ecn_if
+ 3
+171.00 604.00
+171.00 648.00
+-1.00 -1.00
+1 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/ecn_queue.nr b/share/doc/iso/wisc/figs/ecn_queue.nr
new file mode 100644
index 0000000..c6c0ce1
--- /dev/null
+++ b/share/doc/iso/wisc/figs/ecn_queue.nr
@@ -0,0 +1,262 @@
+.(z
+.hl
+.br
+.nr g1 2156u
+.nr g2 1727u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+\D's 4u'\D't 1u'
+.sp -1
+.sp 186u
+\h'617u'\D'l 0u -167u'
+.sp -1
+.ft R
+.ps 7
+.nr g8 \n(.d
+.ds g9 "ecn_if
+.sp 80u
+\h'561u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'
+.sp -1
+.sp 1162u
+\h'602u'\D'l 27u -56u'\D'l 26u 56u'
+.sp -1
+.sp 57u
+\h'1387u'\D'l 26u -57u'\D'l 27u 57u'
+.sp -1
+\D's 4u'
+.sp -1
+.sp 273u
+\h'1413u'\D'l 0u -333u'
+.sp -1
+.sp -4u
+\h'629u'\D'l 0u -382u'
+.sp -1
+\D's -1u'
+.sp -1
+.sp -754u
+\h'443u'\D'l 27u 31u'\D'l -27u 26u'
+.sp -1
+\D't 3u'
+.sp -1
+.sp -726u
+\h'470u'\D'l 0u 487u'\D'l 322u 0u'\D'l 0u -484u'
+.sp -1
+\D't 1u'
+.sp -1
+.sp 419u
+\h'470u'\D'l 318u 0u'
+.sp -1
+.sp -68u
+\h'470u'\D'l 322u 0u'
+.sp -1
+.sp -71u
+\h'473u'\D'l 319u 0u'
+.sp -1
+.sp -72u
+\h'470u'\D'l 326u 0u'
+.sp -1
+\D't 3u'
+.sp -1
+.sp -208u
+\h'1258u'\D'l 0u 487u'\D'l 322u 0u'\D'l 0u -484u'
+.sp -1
+\D't 1u'
+.sp -1
+.sp 419u
+\h'1258u'\D'l 318u 0u'
+.sp -1
+.sp -68u
+\h'1258u'\D'l 322u 0u'
+.sp -1
+.sp -143u
+\h'1258u'\D'l 326u 0u'
+.sp -1
+\D't 3u'
+.sp -1
+.sp 522u
+\h'473u'\D'l 0u 64u'\D'l 311u 0u'\D'l 0u -64u'\D'l -311u 0u'
+.sp -1
+.sp 303u
+\h'473u'\D'l 0u 65u'\D'l 311u 0u'\D'l 0u -65u'\D'l -311u 0u'
+.sp -1
+.sp 254u
+\h'849u'\D'l 0u 64u'\D'l 310u 0u'\D'l 0u -64u'\D'l -310u 0u'
+.sp -1
+.sp -322u
+\h'860u'\D'l 0u 64u'\D'l 311u 0u'\D'l 0u -64u'\D'l -311u 0u'
+.sp -1
+.sp 121u
+\h'1258u'\D'l 0u 65u'\D'l 311u 0u'\D'l 0u -65u'\D'l -311u 0u'
+.sp -1
+.ft R
+.ps 7
+.nr g8 \n(.d
+.ds g9 "QUEUE
+.sp -961u
+\h'542u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 7
+.nr g8 \n(.d
+.ds g9 "QUEUE
+.sp -965u
+\h'1322u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 7
+.nr g8 \n(.d
+.ds g9 "CONS->Driver command
+.sp -1280u
+\h'383u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 7
+.nr g8 \n(.d
+.ds g9 "ecn_pending_req
+.sp -326u
+\&\*(g9
+.sp |\n(g8u
+\D's 4u'\D't 1u'
+.sp -1
+.sp -364u
+\h'621u'\D'l 0u -238u'
+.sp -1
+.sp -98u
+\h'1421u'\D'l 0u -137u'
+.sp -1
+.sp 280u
+\h'1349u'\D'l 72u -201u'\D'l 15u 224u'
+.sp -1
+\D's -1u'
+.sp -1
+.sp -363u
+\h'1394u'\D'l 27u -57u'\D'l 26u 57u'
+.sp -1
+.sp 216u
+\h'390u'\D'l 76u 0u'
+.sp -1
+.ft I
+.ps 7
+.nr g8 \n(.d
+.ds g9 "posted commands
+.sp 405u
+\h'815u'\&\*(g9
+.sp |\n(g8u
+\D's 4u'
+.sp -1
+.sp 591u
+\h'1004u'\D'l 0u 121u'
+.sp -1
+.ft I
+.ps 7
+.nr g8 \n(.d
+.ds g9 "Driver<->Board commands
+.sp 185u
+\h'731u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'
+.sp -1
+.sp 68u
+\h'977u'\D'l 27u -57u'\D'l 27u 57u'
+.sp -1
+\D's 4u'
+.sp -1
+.sp -504u
+\h'1129u'\D'l 288u -201u'
+.sp -1
+.sp 49u
+\h'625u'\D'l -4u -167u'\D'l 383u 122u'
+.sp -1
+.sp -167u
+\h'625u'\D'l 61u 80u'
+.sp -1
+\D's -1u'
+.sp -1
+.sp -109u
+\h'599u'\D'l 22u 49u'\D'l 31u -49u'
+.sp -1
+.sp -735u
+\h'595u'\D'l 22u 50u'\D'l 31u -50u'
+.sp -1
+.sp 1489u
+\h'606u'\D'l 23u 49u'\D'l 30u -49u'
+.sp -1
+\h'1391u'\D'l 22u 49u'\D'l 31u -49u'
+.sp -1
+\D's 16u'
+.sp -1
+.sp -1265u
+\h'311u'\D'l 155u 0u'
+.sp -1
+.sp -4u
+\h'792u'\D'l 466u 0u'
+.sp -1
+.ft I
+.ps 9
+.nr g8 \n(.d
+.ds g9 "CONS module
+.sp -23u
+\h'1701u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 9
+.nr g8 \n(.d
+.ds g9 "ECN driver
+.sp 1030u
+\h'1701u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 9
+.nr g8 \n(.d
+.ds g9 "EICON X.25 board
+.sp 1151u
+\h'1701u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 7
+.nr g8 \n(.d
+.ds g9 "x25intrq
+.sp -102u
+\h'1319u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 7
+.nr g8 \n(.d
+.ds g9 "Driver->CONS replies
+.sp 522u
+\h'1186u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 9
+.nr g8 \n(.d
+.ds g9 "ECN driver
+.sp 80u
+\h'1701u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'
+.sp -1
+.sp 110u
+\h'1262u'\D'l 318u 0u'
+.sp -1
+\D's 16u'
+.sp -1
+.sp -110u
+\h'1580u'\D'l 576u 0u'
+.sp -1
+.sp 1056u
+\h'341u'\D'l 1758u 0u'
+.sp -1
+.sp 307u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fBFigure \n+(FG:\fR Queue Placement Strategy
+.)z
diff --git a/share/doc/iso/wisc/figs/ecn_vc.grn b/share/doc/iso/wisc/figs/ecn_vc.grn
new file mode 100644
index 0000000..b1c93ed
--- /dev/null
+++ b/share/doc/iso/wisc/figs/ecn_vc.grn
@@ -0,0 +1,19 @@
+.(z
+.hl
+.GS C
+width 6.0
+high 4.0
+1 8
+2 10
+3 12
+4 14
+sc 0.5
+narrow 1
+medium 3
+thick 7
+pointscale off
+file ecn_vc.gsrc
+.GE
+.ce
+\fBFigure \n+(FG:\fR Virtual Circuit State Diagram
+.)z
diff --git a/share/doc/iso/wisc/figs/ecn_vc.gsrc b/share/doc/iso/wisc/figs/ecn_vc.gsrc
new file mode 100644
index 0000000..9364ced
--- /dev/null
+++ b/share/doc/iso/wisc/figs/ecn_vc.gsrc
@@ -0,0 +1,273 @@
+gremlinfile
+0 184.00 269.00
+0
+184.00 431.00
+184.00 441.00
+184.00 441.00
+184.00 441.00
+-1.00 -1.00
+2 1
+23 SEND/RECEIVE completion
+ 0
+110.00 358.00
+110.00 368.00
+110.00 368.00
+110.00 368.00
+-1.00 -1.00
+2 1
+10 completion
+ 0
+99.00 368.00
+99.00 378.00
+99.00 378.00
+99.00 378.00
+-1.00 -1.00
+2 1
+11 CLEAR/ABORT
+ 0
+366.00 360.00
+366.00 370.00
+366.00 370.00
+366.00 370.00
+-1.00 -1.00
+2 1
+6 issued
+ 0
+359.00 373.00
+359.00 383.00
+359.00 383.00
+359.00 383.00
+-1.00 -1.00
+2 1
+11 CLEAR/ABORT
+ 0
+210.00 445.00
+210.00 455.00
+210.00 455.00
+210.00 455.00
+-1.00 -1.00
+2 1
+9 0x0a from
+ 0
+206.00 495.00
+206.00 505.00
+206.00 505.00
+206.00 505.00
+-1.00 -1.00
+2 1
+22 CALL/LISTEN completion
+ 0
+264.00 523.00
+264.00 533.00
+264.00 533.00
+264.00 533.00
+-1.00 -1.00
+2 1
+10 completion
+ 0
+240.00 533.00
+240.00 543.00
+240.00 543.00
+240.00 543.00
+-1.00 -1.00
+2 1
+13 RECEIVE/RESET
+ 0
+379.00 575.00
+379.00 585.00
+379.00 585.00
+379.00 585.00
+-1.00 -1.00
+2 1
+10 completion
+ 0
+345.00 589.00
+345.00 599.00
+345.00 599.00
+345.00 599.00
+-1.00 -1.00
+2 1
+22 0x18 from SEND/RECEIVE
+ 0
+394.00 602.00
+394.00 612.00
+394.00 612.00
+394.00 612.00
+-1.00 -1.00
+2 1
+6 - or -
+ 0
+367.00 613.00
+367.00 623.00
+367.00 623.00
+367.00 623.00
+-1.00 -1.00
+2 1
+12 RESET issued
+ 3
+319.00 359.00
+311.00 340.00
+329.00 349.00
+-1.00 -1.00
+5 0
+0
+ 3
+361.00 520.00
+367.00 503.00
+351.00 508.00
+-1.00 -1.00
+5 0
+0
+ 3
+143.00 391.00
+138.00 409.00
+154.00 401.00
+-1.00 -1.00
+5 0
+0
+ 3
+328.00 582.00
+323.00 600.00
+339.00 592.00
+-1.00 -1.00
+5 0
+0
+ 3
+163.00 420.00
+152.00 427.00
+163.00 435.00
+-1.00 -1.00
+5 0
+0
+ 3
+350.00 485.00
+361.00 492.00
+350.00 500.00
+-1.00 -1.00
+5 0
+0
+ 3
+312.00 341.00
+375.00 407.00
+-1.00 -1.00
+4 0
+0
+ 3
+135.00 410.00
+202.00 343.00
+-1.00 -1.00
+4 0
+0
+ 3
+320.00 601.00
+408.00 529.00
+-1.00 -1.00
+4 0
+0
+ 3
+293.00 563.00
+368.00 503.00
+-1.00 -1.00
+4 0
+0
+ 3
+150.00 428.00
+358.00 428.00
+-1.00 -1.00
+4 0
+0
+ 3
+148.00 492.00
+363.00 492.00
+-1.00 -1.00
+4 0
+0
+ 0
+226.00 309.00
+226.00 319.00
+226.00 319.00
+226.00 319.00
+-1.00 -1.00
+1 1
+8 VC_CLEAR
+ 0
+372.00 455.00
+372.00 465.00
+372.00 465.00
+372.00 465.00
+-1.00 -1.00
+1 1
+12 VC_DATA_XFER
+ 0
+25.00 458.00
+25.00 468.00
+25.00 468.00
+25.00 468.00
+-1.00 -1.00
+1 1
+16 VC_NO_CONNECTION
+ 0
+218.00 293.00
+218.00 303.00
+218.00 303.00
+218.00 303.00
+-1.00 -1.00
+1 1
+11 IN_PROGRESS
+ 0
+222.00 618.00
+222.00 628.00
+222.00 628.00
+222.00 628.00
+-1.00 -1.00
+1 1
+11 IN_PROGRESS
+ 0
+228.00 634.00
+228.00 644.00
+228.00 644.00
+228.00 644.00
+-1.00 -1.00
+1 1
+8 VC_RESET
+ 4
+423.00 459.00
+429.00 390.00
+423.00 528.26
+423.00 389.74
+353.74 459.00
+492.26 459.00
+-1.00 -1.00
+5 0
+0
+ 4
+89.00 459.00
+83.00 390.00
+89.00 528.26
+89.00 389.74
+158.26 459.00
+19.74 459.00
+-1.00 -1.00
+5 0
+0
+ 4
+256.00 299.00
+250.00 230.00
+256.00 368.26
+256.00 229.74
+325.26 299.00
+186.74 299.00
+-1.00 -1.00
+5 0
+0
+ 4
+256.00 621.00
+250.00 690.00
+256.00 551.74
+256.00 690.26
+325.26 621.00
+186.74 621.00
+-1.00 -1.00
+5 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/ecn_vc.nr b/share/doc/iso/wisc/figs/ecn_vc.nr
new file mode 100644
index 0000000..ca2cec5
--- /dev/null
+++ b/share/doc/iso/wisc/figs/ecn_vc.nr
@@ -0,0 +1,205 @@
+.(z
+.hl
+.br
+.nr g1 2364u
+.nr g2 2303u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+\D't 1u'
+.sp -1
+.sp 346u
+\h'836u'\D'c 693u'
+.sp -1
+.sp 1610u
+\h'836u'\D'c 693u'
+.sp -1
+.sp -800u
+\D'c 693u'
+.sp -1
+\h'1671u'\D'c 693u'
+.sp -1
+.ft R
+.ps 8
+.nr g8 \n(.d
+.ds g9 "VC_RESET
+.sp -875u
+\h'1042u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 8
+.nr g8 \n(.d
+.ds g9 "IN_PROGRESS
+.sp -795u
+\h'1012u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 8
+.nr g8 \n(.d
+.ds g9 "IN_PROGRESS
+.sp 830u
+\h'992u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 8
+.nr g8 \n(.d
+.ds g9 "VC_NO_CONNECTION
+.sp 5u
+\h'27u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 8
+.nr g8 \n(.d
+.ds g9 "VC_DATA_XFER
+.sp 20u
+\h'1763u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 8
+.nr g8 \n(.d
+.ds g9 "VC_CLEAR
+.sp 750u
+\h'1032u'\&\*(g9
+.sp |\n(g8u
+\D's 16u'
+.sp -1
+.sp -165u
+\h'642u'\D'l 1076u 0u'
+.sp -1
+.sp 320u
+\h'652u'\D'l 1041u 0u'
+.sp -1
+.sp -675u
+\h'1367u'\D'l 376u 300u'
+.sp -1
+.sp -190u
+\h'1502u'\D'l 441u 360u'
+.sp -1
+.sp 955u
+\h'577u'\D'l 335u 335u'
+.sp -1
+.sp 345u
+\h'1462u'\D'l 316u -330u'
+.sp -1
+\D's -1u'
+.sp -1
+.sp -720u
+\h'1653u'\D'l 55u -35u'\D'l -55u -40u'
+.sp -1
+.sp 325u
+\h'717u'\D'l -55u -35u'\D'l 55u -40u'
+.sp -1
+.sp -810u
+\h'1542u'\D'l -25u -90u'\D'l 81u 40u'
+.sp -1
+.sp 955u
+\h'617u'\D'l -25u -90u'\D'l 80u 40u'
+.sp -1
+.sp -645u
+\h'1708u'\D'l 30u 85u'\D'l -80u -25u'
+.sp -1
+.sp 805u
+\h'1497u'\D'l -40u 95u'\D'l 91u -45u'
+.sp -1
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "RESET issued
+.sp -1270u
+\h'1738u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "- or -
+.sp -1215u
+\h'1873u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "0x18 from SEND/RECEIVE
+.sp -1150u
+\h'1628u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "completion
+.sp -1080u
+\h'1798u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "RECEIVE/RESET
+.sp -870u
+\h'1102u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "completion
+.sp -820u
+\h'1222u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "CALL/LISTEN completion
+.sp -680u
+\h'932u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "0x0a from
+.sp -430u
+\h'952u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "CLEAR/ABORT
+.sp -70u
+\h'1698u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "issued
+.sp -5u
+\h'1733u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "CLEAR/ABORT
+.sp -45u
+\h'397u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "completion
+.sp 5u
+\h'452u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 8
+.nr g8 \n(.d
+.ds g9 "SEND/RECEIVE completion
+.sp -360u
+\h'822u'\&\*(g9
+.sp |\n(g8u
+.sp 647u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fBFigure \n+(FG:\fR Virtual Circuit State Diagram
+.)z
diff --git a/share/doc/iso/wisc/figs/func_units.grn b/share/doc/iso/wisc/figs/func_units.grn
new file mode 100644
index 0000000..3c98567
--- /dev/null
+++ b/share/doc/iso/wisc/figs/func_units.grn
@@ -0,0 +1,18 @@
+.(z L
+.GS C
+width 6.0
+high 7.0
+1 8
+2 10
+3 12
+4 14
+sc 0.3
+narrow 1
+medium 3
+thick 7
+pointscale off
+file func_units.gsrc
+.GE
+.ce
+\fBFigure \n+(FG:\fR The major functional units of Unix 4.2A
+.)z
diff --git a/share/doc/iso/wisc/figs/func_units.gsrc b/share/doc/iso/wisc/figs/func_units.gsrc
new file mode 100644
index 0000000..29edd83
--- /dev/null
+++ b/share/doc/iso/wisc/figs/func_units.gsrc
@@ -0,0 +1,603 @@
+gremlinfile
+0 80.00 208.00
+0
+80.00 580.00
+80.00 596.00
+80.00 596.00
+80.00 596.00
+-1.00 -1.00
+2 3
+6 kernel
+ 0
+80.00 628.00
+80.00 644.00
+80.00 644.00
+80.00 644.00
+-1.00 -1.00
+2 3
+4 user
+ 3
+144.00 212.00
+141.00 217.00
+144.00 215.00
+147.00 217.00
+144.00 212.00
+-1.00 -1.00
+6 0
+0
+ 3
+184.00 340.00
+189.66 338.59
+186.12 337.88
+185.41 334.34
+184.00 340.00
+-1.00 -1.00
+6 0
+0
+ 3
+264.00 284.00
+258.34 285.41
+261.88 286.12
+262.59 289.66
+264.00 284.00
+-1.00 -1.00
+6 0
+0
+ 3
+320.00 324.00
+317.00 329.00
+320.00 327.00
+323.00 329.00
+320.00 324.00
+-1.00 -1.00
+6 0
+0
+ 3
+312.00 356.00
+315.00 351.00
+312.00 353.00
+309.00 351.00
+312.00 356.00
+-1.00 -1.00
+6 0
+0
+ 3
+536.00 548.00
+540.43 544.21
+536.95 545.15
+534.74 542.31
+536.00 548.00
+-1.00 -1.00
+6 0
+0
+ 3
+576.00 388.00
+573.00 393.00
+576.00 391.00
+579.00 393.00
+576.00 388.00
+-1.00 -1.00
+6 0
+0
+ 3
+544.00 380.00
+538.34 381.41
+541.88 382.12
+542.59 385.66
+544.00 380.00
+-1.00 -1.00
+6 0
+0
+ 3
+544.00 380.00
+541.00 385.00
+544.00 383.00
+547.00 385.00
+544.00 380.00
+-1.00 -1.00
+6 0
+0
+ 3
+560.00 292.00
+563.00 287.00
+560.00 289.00
+557.00 287.00
+560.00 292.00
+-1.00 -1.00
+6 0
+0
+ 3
+488.00 156.00
+487.55 161.81
+489.34 158.68
+492.92 159.13
+488.00 156.00
+-1.00 -1.00
+6 0
+0
+ 3
+456.00 164.00
+453.00 169.00
+456.00 167.00
+459.00 169.00
+456.00 164.00
+-1.00 -1.00
+6 0
+0
+ 3
+384.00 308.00
+385.41 313.66
+386.12 310.12
+389.66 309.41
+384.00 308.00
+-1.00 -1.00
+6 0
+0
+ 3
+360.00 420.00
+365.00 423.00
+363.00 420.00
+365.00 417.00
+360.00 420.00
+-1.00 -1.00
+6 0
+0
+ 3
+344.00 444.00
+345.41 449.66
+346.12 446.12
+349.66 445.41
+344.00 444.00
+-1.00 -1.00
+6 0
+0
+ 3
+456.00 556.00
+456.45 550.19
+454.66 553.32
+451.08 552.87
+456.00 556.00
+-1.00 -1.00
+6 0
+0
+ 3
+272.00 436.00
+266.34 437.41
+269.88 438.12
+270.59 441.66
+272.00 436.00
+-1.00 -1.00
+6 0
+0
+ 3
+184.00 404.00
+185.41 409.66
+186.12 406.12
+189.66 405.41
+184.00 404.00
+-1.00 -1.00
+6 0
+0
+ 3
+432.00 652.00
+427.00 649.00
+429.00 652.00
+427.00 655.00
+432.00 652.00
+-1.00 -1.00
+6 0
+0
+ 3
+352.00 676.00
+357.81 676.45
+354.68 674.66
+355.13 671.08
+352.00 676.00
+-1.00 -1.00
+6 0
+0
+ 3
+200.00 500.00
+205.66 498.59
+202.12 497.88
+201.41 494.34
+200.00 500.00
+-1.00 -1.00
+6 0
+0
+ 3
+208.00 548.00
+213.00 551.00
+211.00 548.00
+213.00 545.00
+208.00 548.00
+-1.00 -1.00
+6 0
+0
+ 3
+192.00 572.00
+193.41 577.66
+194.12 574.12
+197.66 573.41
+192.00 572.00
+-1.00 -1.00
+6 0
+0
+ 3
+272.00 660.00
+272.45 654.19
+270.66 657.32
+267.08 656.87
+272.00 660.00
+-1.00 -1.00
+6 0
+0
+ 3
+456.00 556.00
+344.00 444.00
+-1.00 -1.00
+6 0
+0
+ 3
+544.00 380.00
+520.00 412.00
+-1.00 -1.00
+6 0
+0
+ 3
+576.00 388.00
+536.00 548.00
+-1.00 -1.00
+6 0
+0
+ 3
+560.00 292.00
+488.00 156.00
+-1.00 -1.00
+6 0
+0
+ 3
+424.00 612.00
+568.00 612.00
+-1.00 -1.00
+2 0
+0
+ 0
+424.00 68.00
+424.00 84.00
+424.00 84.00
+424.00 84.00
+-1.00 -1.00
+3 3
+7 drivers
+ 0
+424.00 92.00
+424.00 108.00
+424.00 108.00
+424.00 108.00
+-1.00 -1.00
+3 3
+9 interface
+ 3
+480.00 404.00
+456.00 164.00
+-1.00 -1.00
+6 0
+0
+ 3
+464.00 412.00
+384.00 308.00
+-1.00 -1.00
+6 0
+0
+ 3
+448.00 436.00
+360.00 420.00
+-1.00 -1.00
+6 0
+0
+ 3
+312.00 356.00
+320.00 324.00
+-1.00 -1.00
+6 0
+0
+ 3
+200.00 500.00
+272.00 436.00
+-1.00 -1.00
+6 0
+0
+ 3
+184.00 340.00
+264.00 284.00
+-1.00 -1.00
+6 0
+0
+ 3
+144.00 324.00
+144.00 212.00
+-1.00 -1.00
+6 0
+0
+ 3
+440.00 564.00
+176.00 404.00
+-1.00 -1.00
+6 0
+0
+ 3
+424.00 588.00
+208.00 548.00
+-1.00 -1.00
+6 0
+0
+ 3
+352.00 676.00
+432.00 652.00
+-1.00 -1.00
+6 0
+0
+ 3
+272.00 660.00
+192.00 572.00
+-1.00 -1.00
+6 0
+0
+ 0
+120.00 132.00
+120.00 148.00
+120.00 148.00
+120.00 148.00
+-1.00 -1.00
+3 3
+7 drivers
+ 0
+120.00 156.00
+120.00 172.00
+120.00 172.00
+120.00 172.00
+-1.00 -1.00
+3 3
+6 device
+ 0
+120.00 180.00
+120.00 196.00
+120.00 196.00
+120.00 196.00
+-1.00 -1.00
+3 3
+7 blocked
+ 0
+424.00 116.00
+424.00 132.00
+424.00 132.00
+424.00 132.00
+-1.00 -1.00
+3 3
+8 network
+ 0
+560.00 332.00
+560.00 348.00
+560.00 348.00
+560.00 348.00
+-1.00 -1.00
+3 3
+3 IPC
+ 0
+304.00 212.00
+304.00 228.00
+304.00 228.00
+304.00 228.00
+-1.00 -1.00
+3 3
+7 support
+ 0
+304.00 236.00
+304.00 252.00
+304.00 252.00
+304.00 252.00
+-1.00 -1.00
+3 3
+6 memory
+ 0
+304.00 260.00
+304.00 276.00
+304.00 276.00
+304.00 276.00
+-1.00 -1.00
+3 3
+7 virtual
+ 0
+128.00 356.00
+128.00 372.00
+128.00 372.00
+128.00 372.00
+-1.00 -1.00
+3 3
+6 system
+ 0
+128.00 380.00
+128.00 396.00
+128.00 396.00
+128.00 396.00
+-1.00 -1.00
+3 3
+4 file
+ 0
+480.00 452.00
+480.00 468.00
+480.00 468.00
+480.00 468.00
+-1.00 -1.00
+3 3
+5 clock
+ 0
+288.00 380.00
+288.00 396.00
+288.00 396.00
+288.00 396.00
+-1.00 -1.00
+3 3
+7 support
+ 0
+288.00 404.00
+288.00 420.00
+288.00 420.00
+288.00 420.00
+-1.00 -1.00
+3 3
+7 process
+ 0
+448.00 572.00
+448.00 588.00
+448.00 588.00
+448.00 588.00
+-1.00 -1.00
+3 3
+12 system calls
+ 0
+456.00 628.00
+456.00 644.00
+456.00 644.00
+456.00 644.00
+-1.00 -1.00
+3 3
+9 C library
+ 0
+288.00 692.00
+288.00 708.00
+288.00 708.00
+288.00 708.00
+-1.00 -1.00
+3 3
+4 user
+ 0
+272.00 676.00
+272.00 692.00
+272.00 692.00
+272.00 692.00
+-1.00 -1.00
+3 3
+7 program
+ 0
+144.00 516.00
+144.00 532.00
+144.00 532.00
+144.00 532.00
+-1.00 -1.00
+3 3
+3 tty
+ 3
+568.00 612.00
+640.00 612.00
+-1.00 -1.00
+6 0
+0
+ 3
+424.00 612.00
+64.00 612.00
+-1.00 -1.00
+6 0
+0
+ 4
+496.00 612.00
+496.00 684.00
+496.00 540.00
+496.00 684.00
+568.00 612.00
+424.00 612.00
+-1.00 -1.00
+6 0
+0
+ 4
+456.00 100.00
+456.00 164.00
+456.00 36.00
+456.00 164.00
+520.00 100.00
+392.00 100.00
+-1.00 -1.00
+6 0
+0
+ 4
+336.00 244.00
+336.00 324.00
+336.00 164.00
+336.00 324.00
+416.00 244.00
+256.00 244.00
+-1.00 -1.00
+6 0
+0
+ 4
+144.00 164.00
+144.00 212.00
+144.00 116.00
+144.00 212.00
+192.00 164.00
+96.00 164.00
+-1.00 -1.00
+6 0
+0
+ 4
+144.00 372.00
+144.00 420.00
+144.00 324.00
+144.00 420.00
+192.00 372.00
+96.00 372.00
+-1.00 -1.00
+6 0
+0
+ 4
+576.00 340.00
+576.00 388.00
+576.00 292.00
+576.00 388.00
+624.00 340.00
+528.00 340.00
+-1.00 -1.00
+6 0
+0
+ 4
+496.00 452.00
+496.00 500.00
+496.00 404.00
+496.00 500.00
+544.00 452.00
+448.00 452.00
+-1.00 -1.00
+6 0
+0
+ 4
+312.00 404.00
+312.00 452.00
+312.00 356.00
+312.00 452.00
+360.00 404.00
+264.00 404.00
+-1.00 -1.00
+6 0
+0
+ 4
+160.00 532.00
+160.00 580.00
+160.00 484.00
+160.00 580.00
+208.00 532.00
+112.00 532.00
+-1.00 -1.00
+6 0
+0
+ 4
+304.00 692.00
+304.00 740.00
+304.00 644.00
+304.00 740.00
+352.00 692.00
+256.00 692.00
+-1.00 -1.00
+6 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/func_units.nr b/share/doc/iso/wisc/figs/func_units.nr
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/share/doc/iso/wisc/figs/func_units.nr
diff --git a/share/doc/iso/wisc/figs/link_to_CONS_primitives.NR.DONT_REMOVE b/share/doc/iso/wisc/figs/link_to_CONS_primitives.NR.DONT_REMOVE
new file mode 100644
index 0000000..16dc3e0
--- /dev/null
+++ b/share/doc/iso/wisc/figs/link_to_CONS_primitives.NR.DONT_REMOVE
@@ -0,0 +1,77 @@
+.(b
+.TS
+tab(+) center expand box;
+c c
+a | a .
+service primitive & arguments+provided by
+=
+N_CONNECT.request+cons_openvc(... faddr, ...)
+called address+argument faddr
+calling address+not implemented
+receipt confirmation+not implemented
+expedited data+not implemented
+quality of service+not implemented
+NS-user data+not implemented
+_
+N_CONNECT.indication+not implemented
+_
+N_CONNECT.response+cons_netcmd( CONN_REFUSE )
++ or cons_netcmd( CONN_CONFIRM )
++ however, net connection has already
++ been accepted. If REFUSE, it will
++ be cleared with E_CO_HLI_REJT
++ (higher layer rejects connection)
+responding address+not implemented
+receipt confirmation+not implemented
+expedited data+not implemented
+quality of service+not implemented
+NS-user data+not implemented
+_
+N_CONNECT.confirm+not implemented
+=
+N_DATA.request+cons_output(... m, ...)
++and cosns_output(... m, ...)
+confirmation+not implemented
+data+mbuf chain m
+_
+N_DATA.indication+pr_input( m, ... )
++or software interrupt
+confirmation+not implemented
+data+mbuf chain
+_
+N_DATA_ACKNOWLEDGE.request+not implemented
+_
+N_DATA_ACKNOWLEDGE.indication+not implemented
+_
+N_EXPEDITED_DATA.request+not implemented
+_
+N_EXPEDITED_DATA.indication+not implemented
+=
+N_RESET.request+not implemented
+N_RESET.indication+socket->so_error = reason
++or pr_ctlinput( PRC_ROUTEDEAD )
+originator+not implemented
+reason+from X.25 packet or ecn driver
+N_RESET.response+not implemented
+N_RESET.confirm+not implemented
+=
+N_DISCONNECT.request+cons_netcmd( CONN_CLOSE )
+reason+uses E_CO_HLI_DISCN (normal
++disconnect from higher layer)
+responding address+not implemented
+NS_user data+not implemented
+_
+N_DISCONNECT.indication+socket->so_error = reason
++or pr_ctlinput( PRC_ROUTEDEAD )
+originator+not implemented
+reason+from X.25 packet or ecn driver
+responding address+not implemented
+NS_user data+not implemented
+.TE
+.(c
+\fBFigure \n+(FG\fR: Transport Service Primitives
+.)c
+.)b
+.(f
+\** data on disconnect is not supported at this time.
+.)f
diff --git a/share/doc/iso/wisc/figs/link_to_TS_primitives.NR.DONT_REMOVE b/share/doc/iso/wisc/figs/link_to_TS_primitives.NR.DONT_REMOVE
new file mode 100644
index 0000000..3d27df3
--- /dev/null
+++ b/share/doc/iso/wisc/figs/link_to_TS_primitives.NR.DONT_REMOVE
@@ -0,0 +1,60 @@
+.(b
+.TS
+center expand box;
+c c
+a | a .
+service primitive & arguments Unix system calls & arguments
+=
+T_CONNECT.request \fIsocket(), connect(), setsockopt()\fR
+called address \fIconnect()\fR argument
+calling address \fIconnect()\fR argument
+quality of service not implemented
+buffer management \fIsetsockopt()\fR argument
+security not implemented
+data \fIsetsockopt(), getsockopt()\fR
+_
+T_CONNECT.indication return from \fIaccept(); getsockopt()\fR
+called address \fIaccept()\fR argument
+calling address \fIaccept()\fR argument
+quality of service not implemented
+security not implemented
+data \fIsetsockopt(), getsockopt()\fR
+_
+T_CONNECT.response no applicable system calls
+_
+T_CONNECT.confirm return from \fIconnect()\fR
+quality of service \fIgetsockopt()\fR argument
+data \fIsetsocktopt, getsockopt()\fR
+=
+T_DATA.request \fIrecvv(), sendv()\fR
+_
+T_DATA.indication return from \fIrecvv()\fR, \fIsendv()\fR, or \fIselect()\fR;
+ or signal SIGIO
+ ioctl(FIONREAD) tells how much has been
+ queued to read
+=
+T_EXPEDITED_DATA.request \fIsendv()\fR with MSG_OOB flag
+_
+T_EXPEDITED_DATA.indication SIGURG, \fIgetsockopt()\fR with TPFLAG_XPD,
+ return from \fIselect()\fR with exceptional
+ conditions mask
+=
+T_DISCONNECT.request \fIclose()\fR
+data \fIsetsockopt()\fR
+_
+T_DISCONNECT.indication SIGURG,
+ error return on other primitives
+reason errno
+data \fIgetsockopt()\**\fR
+=
+T_STATUS.request \fIgetsockopt()\fR, \fItpstat\fR utility program
+_
+T_STATUS.indication \fIgetsockopt()\fR, \fIselect()\fR, \fItpstat\fR
+.TE
+.(c
+\fBFigure \n+(FG\fR: Transport Service Primitives
+.)c
+.)b
+.(f
+\** data on disconnect is not supported at this time.
+.)f
diff --git a/share/doc/iso/wisc/figs/mbufrcv.grn b/share/doc/iso/wisc/figs/mbufrcv.grn
new file mode 100644
index 0000000..0f3fa52
--- /dev/null
+++ b/share/doc/iso/wisc/figs/mbufrcv.grn
@@ -0,0 +1,13 @@
+.(z
+.GS C
+width 5.0
+high 6.0
+narrow 1
+medium 3
+thick 7
+pointscale on
+file mbufrcv.gsrc
+.GE
+.ce
+\fB Figure \n+(FG\fR: \fImbuf\fR chains on socket receive buffer
+.)z
diff --git a/share/doc/iso/wisc/figs/mbufrcv.gsrc b/share/doc/iso/wisc/figs/mbufrcv.gsrc
new file mode 100644
index 0000000..1577804
--- /dev/null
+++ b/share/doc/iso/wisc/figs/mbufrcv.gsrc
@@ -0,0 +1,1006 @@
+gremlinfile
+0 328.00 496.00
+0
+328.00 224.00
+328.00 234.00
+328.00 234.00
+328.00 234.00
+-1.00 -1.00
+1 1
+7 MT_DATA
+ 0
+328.00 400.00
+328.00 410.00
+328.00 410.00
+328.00 410.00
+-1.00 -1.00
+1 1
+7 MT_DATA
+ 0
+328.00 576.00
+328.00 586.00
+328.00 586.00
+328.00 586.00
+-1.00 -1.00
+1 1
+7 MT_DATA
+ 0
+72.00 576.00
+72.00 586.00
+72.00 586.00
+72.00 586.00
+-1.00 -1.00
+1 1
+7 MT_DATA
+ 3
+384.00 256.00
+416.00 256.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 272.00
+416.00 240.00
+-1.00 -1.00
+5 0
+0
+ 3
+432.00 272.00
+432.00 240.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 240.00
+432.00 272.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 416.00
+432.00 448.00
+-1.00 -1.00
+5 0
+0
+ 3
+432.00 448.00
+432.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 448.00
+416.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+384.00 432.00
+416.00 432.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 432.00
+160.00 432.00
+-1.00 -1.00
+5 0
+0
+ 3
+160.00 448.00
+160.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+176.00 448.00
+176.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+160.00 416.00
+176.00 448.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 512.00
+288.00 512.00
+-1.00 -1.00
+5 0
+0
+ 3
+512.00 528.00
+512.00 496.00
+528.00 528.00
+528.00 496.00
+-1.00 -1.00
+5 0
+0
+ 3
+496.00 512.00
+512.00 512.00
+-1.00 -1.00
+5 0
+0
+ 3
+160.00 320.00
+176.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+176.00 352.00
+176.00 320.00
+-1.00 -1.00
+5 0
+0
+ 3
+160.00 352.00
+160.00 320.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 336.00
+160.00 336.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 144.00
+432.00 176.00
+-1.00 -1.00
+5 0
+0
+ 3
+512.00 592.00
+528.00 624.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 496.00
+304.00 528.00
+-1.00 -1.00
+5 0
+0
+ 3
+432.00 176.00
+432.00 144.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 176.00
+416.00 144.00
+-1.00 -1.00
+5 0
+0
+ 3
+384.00 160.00
+416.00 160.00
+-1.00 -1.00
+5 0
+0
+ 3
+304.00 528.00
+304.00 496.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 528.00
+288.00 496.00
+-1.00 -1.00
+5 0
+0
+ 3
+528.00 624.00
+528.00 592.00
+-1.00 -1.00
+5 0
+0
+ 3
+512.00 624.00
+512.00 592.00
+-1.00 -1.00
+5 0
+0
+ 3
+496.00 608.00
+512.00 608.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 272.00
+349.00 277.00
+352.00 275.00
+355.00 277.00
+352.00 272.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 288.00
+352.00 272.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 288.00
+352.00 288.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 336.00
+416.00 288.00
+-1.00 -1.00
+5 0
+0
+ 3
+384.00 336.00
+416.00 336.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 448.00
+349.00 453.00
+352.00 451.00
+355.00 453.00
+352.00 448.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 464.00
+352.00 448.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 464.00
+352.00 464.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 512.00
+416.00 464.00
+-1.00 -1.00
+5 0
+0
+ 3
+96.00 448.00
+93.00 453.00
+96.00 451.00
+99.00 453.00
+96.00 448.00
+-1.00 -1.00
+5 0
+0
+ 3
+96.00 464.00
+96.00 448.00
+-1.00 -1.00
+5 0
+0
+ 3
+160.00 464.00
+96.00 464.00
+-1.00 -1.00
+5 0
+0
+ 3
+160.00 512.00
+160.00 464.00
+-1.00 -1.00
+5 0
+0
+ 3
+384.00 512.00
+416.00 512.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 512.00
+160.00 512.00
+-1.00 -1.00
+5 0
+0
+ 3
+432.00 608.00
+427.00 605.00
+429.00 608.00
+427.00 611.00
+432.00 608.00
+-1.00 -1.00
+5 0
+0
+ 3
+384.00 608.00
+432.00 608.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 608.00
+315.00 605.00
+317.00 608.00
+315.00 611.00
+320.00 608.00
+-1.00 -1.00
+5 0
+0
+ 3
+192.00 608.00
+187.00 605.00
+189.00 608.00
+187.00 611.00
+192.00 608.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 608.00
+320.00 608.00
+-1.00 -1.00
+5 0
+0
+ 3
+128.00 608.00
+192.00 608.00
+-1.00 -1.00
+5 0
+0
+ 3
+432.00 624.00
+432.00 496.00
+496.00 496.00
+496.00 624.00
+432.00 624.00
+-1.00 -1.00
+5 0
+0
+ 3
+432.00 592.00
+496.00 592.00
+-1.00 -1.00
+5 0
+0
+ 3
+432.00 528.00
+432.00 528.00
+496.00 528.00
+496.00 528.00
+432.00 528.00
+-1.00 -1.00
+5 0
+0
+ 0
+448.00 512.00
+448.00 522.00
+448.00 522.00
+448.00 522.00
+-1.00 -1.00
+1 1
+6 m_next
+ 0
+448.00 608.00
+448.00 618.00
+448.00 618.00
+448.00 618.00
+-1.00 -1.00
+1 1
+5 m_act
+ 3
+432.00 576.00
+432.00 576.00
+496.00 576.00
+496.00 576.00
+432.00 576.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 224.00
+320.00 224.00
+384.00 224.00
+384.00 224.00
+320.00 224.00
+-1.00 -1.00
+5 0
+0
+ 0
+336.00 256.00
+336.00 266.00
+336.00 266.00
+336.00 266.00
+-1.00 -1.00
+1 1
+5 m_act
+ 0
+336.00 160.00
+336.00 170.00
+336.00 170.00
+336.00 170.00
+-1.00 -1.00
+1 1
+6 m_next
+ 3
+320.00 176.00
+320.00 176.00
+384.00 176.00
+384.00 176.00
+320.00 176.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 240.00
+384.00 240.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 272.00
+320.00 144.00
+384.00 144.00
+384.00 272.00
+320.00 272.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 448.00
+320.00 320.00
+384.00 320.00
+384.00 448.00
+320.00 448.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 416.00
+384.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 352.00
+320.00 352.00
+384.00 352.00
+384.00 352.00
+320.00 352.00
+-1.00 -1.00
+5 0
+0
+ 0
+336.00 336.00
+336.00 346.00
+336.00 346.00
+336.00 346.00
+-1.00 -1.00
+1 1
+6 m_next
+ 0
+336.00 432.00
+336.00 442.00
+336.00 442.00
+336.00 442.00
+-1.00 -1.00
+1 1
+5 m_act
+ 3
+320.00 400.00
+320.00 400.00
+384.00 400.00
+384.00 400.00
+320.00 400.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 576.00
+320.00 576.00
+384.00 576.00
+384.00 576.00
+320.00 576.00
+-1.00 -1.00
+5 0
+0
+ 0
+336.00 608.00
+336.00 618.00
+336.00 618.00
+336.00 618.00
+-1.00 -1.00
+1 1
+5 m_act
+ 0
+336.00 512.00
+336.00 522.00
+336.00 522.00
+336.00 522.00
+-1.00 -1.00
+1 1
+6 m_next
+ 3
+320.00 528.00
+320.00 528.00
+384.00 528.00
+384.00 528.00
+320.00 528.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 592.00
+384.00 592.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 624.00
+320.00 496.00
+384.00 496.00
+384.00 624.00
+320.00 624.00
+-1.00 -1.00
+5 0
+0
+ 3
+192.00 624.00
+192.00 496.00
+256.00 496.00
+256.00 624.00
+192.00 624.00
+-1.00 -1.00
+5 0
+0
+ 3
+192.00 592.00
+256.00 592.00
+-1.00 -1.00
+5 0
+0
+ 3
+192.00 528.00
+192.00 528.00
+256.00 528.00
+256.00 528.00
+192.00 528.00
+-1.00 -1.00
+5 0
+0
+ 0
+208.00 512.00
+208.00 522.00
+208.00 522.00
+208.00 522.00
+-1.00 -1.00
+1 1
+6 m_next
+ 0
+208.00 608.00
+208.00 618.00
+208.00 618.00
+208.00 618.00
+-1.00 -1.00
+1 1
+5 m_act
+ 3
+192.00 576.00
+192.00 576.00
+256.00 576.00
+256.00 576.00
+192.00 576.00
+-1.00 -1.00
+5 0
+0
+ 3
+64.00 400.00
+64.00 400.00
+128.00 400.00
+128.00 400.00
+64.00 400.00
+-1.00 -1.00
+5 0
+0
+ 0
+80.00 432.00
+80.00 442.00
+80.00 442.00
+80.00 442.00
+-1.00 -1.00
+1 1
+5 m_act
+ 0
+80.00 336.00
+80.00 346.00
+80.00 346.00
+80.00 346.00
+-1.00 -1.00
+1 1
+6 m_next
+ 3
+64.00 352.00
+64.00 352.00
+128.00 352.00
+128.00 352.00
+64.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+64.00 416.00
+128.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+64.00 448.00
+64.00 320.00
+128.00 320.00
+128.00 448.00
+64.00 448.00
+-1.00 -1.00
+5 0
+0
+ 3
+64.00 576.00
+64.00 576.00
+128.00 576.00
+128.00 576.00
+64.00 576.00
+-1.00 -1.00
+5 0
+0
+ 0
+80.00 608.00
+80.00 618.00
+80.00 618.00
+80.00 618.00
+-1.00 -1.00
+1 1
+5 m_act
+ 0
+80.00 512.00
+80.00 522.00
+80.00 522.00
+80.00 522.00
+-1.00 -1.00
+1 1
+6 m_next
+ 3
+64.00 528.00
+64.00 528.00
+128.00 528.00
+128.00 528.00
+64.00 528.00
+-1.00 -1.00
+5 0
+0
+ 3
+64.00 592.00
+128.00 592.00
+-1.00 -1.00
+5 0
+0
+ 3
+64.00 624.00
+64.00 496.00
+128.00 496.00
+128.00 624.00
+64.00 624.00
+-1.00 -1.00
+5 0
+0
+ 0
+74.00 401.00
+74.00 411.00
+74.00 411.00
+74.00 411.00
+-1.00 -1.00
+1 1
+6 MT_EOT
+ 0
+207.00 577.00
+207.00 587.00
+207.00 587.00
+207.00 587.00
+-1.00 -1.00
+1 1
+6 MT_EOT
+ 0
+446.00 575.00
+446.00 585.00
+446.00 585.00
+446.00 585.00
+-1.00 -1.00
+1 1
+6 MT_EOT
+ 3
+80.00 576.00
+80.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+72.00 576.00
+72.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+88.00 576.00
+88.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+96.00 576.00
+96.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+104.00 576.00
+104.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+112.00 576.00
+112.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+120.00 576.00
+120.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+80.00 400.00
+80.00 376.00
+-1.00 -1.00
+1 0
+0
+ 3
+72.00 400.00
+72.00 376.00
+-1.00 -1.00
+1 0
+0
+ 3
+88.00 400.00
+88.00 376.00
+-1.00 -1.00
+1 0
+0
+ 3
+96.00 400.00
+96.00 376.00
+-1.00 -1.00
+1 0
+0
+ 3
+104.00 400.00
+104.00 376.00
+-1.00 -1.00
+1 0
+0
+ 3
+112.00 400.00
+112.00 376.00
+-1.00 -1.00
+1 0
+0
+ 3
+120.00 400.00
+120.00 376.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 552.00
+336.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+328.00 552.00
+328.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+344.00 552.00
+344.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+352.00 552.00
+352.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+360.00 552.00
+360.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+368.00 552.00
+368.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+376.00 552.00
+376.00 528.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 400.00
+336.00 352.00
+-1.00 -1.00
+1 0
+0
+ 3
+328.00 400.00
+328.00 352.00
+-1.00 -1.00
+1 0
+0
+ 3
+344.00 400.00
+344.00 352.00
+-1.00 -1.00
+1 0
+0
+ 3
+352.00 400.00
+352.00 352.00
+-1.00 -1.00
+1 0
+0
+ 3
+360.00 400.00
+360.00 352.00
+-1.00 -1.00
+1 0
+0
+ 3
+368.00 400.00
+368.00 352.00
+-1.00 -1.00
+1 0
+0
+ 3
+376.00 400.00
+376.00 352.00
+-1.00 -1.00
+1 0
+0
+ 3
+328.00 208.00
+328.00 192.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 208.00
+336.00 192.00
+-1.00 -1.00
+1 0
+0
+ 3
+352.00 208.00
+352.00 192.00
+-1.00 -1.00
+1 0
+0
+ 3
+344.00 208.00
+344.00 192.00
+-1.00 -1.00
+1 0
+0
+ 3
+360.00 208.00
+360.00 192.00
+-1.00 -1.00
+1 0
+0
+ 3
+368.00 208.00
+368.00 192.00
+-1.00 -1.00
+1 0
+0
+ 3
+376.00 208.00
+376.00 192.00
+-1.00 -1.00
+1 0
+0
+ 0
+64.00 640.00
+64.00 650.00
+64.00 650.00
+64.00 650.00
+-1.00 -1.00
+1 1
+10 first TSDU
+ 0
+192.00 640.00
+192.00 650.00
+192.00 650.00
+192.00 650.00
+-1.00 -1.00
+1 1
+11 second TSDU
+ 0
+320.00 640.00
+320.00 650.00
+320.00 650.00
+320.00 650.00
+-1.00 -1.00
+1 1
+9 last TSDU
+ 0
+64.00 688.00
+64.00 698.00
+64.00 698.00
+64.00 698.00
+-1.00 -1.00
+1 1
+16 so->so_rcv.sb_mb
+ 3
+48.00 704.00
+48.00 672.00
+160.00 672.00
+160.00 704.00
+48.00 704.00
+-1.00 -1.00
+5 0
+0
+ 3
+48.00 688.00
+32.00 688.00
+-1.00 -1.00
+5 0
+0
+ 3
+32.00 688.00
+32.00 608.00
+-1.00 -1.00
+5 0
+0
+ 3
+32.00 608.00
+64.00 608.00
+-1.00 -1.00
+5 0
+0
+ 3
+64.00 608.00
+59.00 605.00
+61.00 608.00
+59.00 611.00
+64.00 608.00
+-1.00 -1.00
+5 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/mbufrcv.nr b/share/doc/iso/wisc/figs/mbufrcv.nr
new file mode 100644
index 0000000..af35c70
--- /dev/null
+++ b/share/doc/iso/wisc/figs/mbufrcv.nr
@@ -0,0 +1,504 @@
+.(z
+.br
+.nr g1 2880u
+.nr g2 3250u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+\D't 1u'
+.sp -1
+.sp 557u
+\h'186u'\D'l -29u 18u'\D'l 12u -18u'\D'l -12u -17u'\D'l 29u 17u'
+.sp -1
+\D'l 186u 0u'
+.sp -1
+.sp -464u
+\D'l 0u 464u'
+.sp -1
+\h'93u'\D'l -93u 0u'
+.sp -1
+.sp -93u
+\h'93u'\D'l 0u 186u'\D'l 651u 0u'\D'l 0u -186u'\D'l -651u 0u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "so->so_rcv.sb_mb
+.sp 93u
+\h'186u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "last TSDU
+.sp 371u
+\h'1673u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "second TSDU
+.sp 371u
+\h'929u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "first TSDU
+.sp 371u
+\h'186u'\&\*(g9
+.sp |\n(g8u
+\D's 4u'
+.sp -1
+.sp 2879u
+\h'1998u'\D'l 0u 93u'
+.sp -1
+\h'1951u'\D'l 0u 93u'
+.sp -1
+\h'1905u'\D'l 0u 93u'
+.sp -1
+\h'1812u'\D'l 0u 93u'
+.sp -1
+\h'1858u'\D'l 0u 93u'
+.sp -1
+\h'1765u'\D'l 0u 93u'
+.sp -1
+\h'1719u'\D'l 0u 93u'
+.sp -1
+.sp -1115u
+\h'1998u'\D'l 0u 279u'
+.sp -1
+\h'1951u'\D'l 0u 279u'
+.sp -1
+\h'1905u'\D'l 0u 279u'
+.sp -1
+\h'1858u'\D'l 0u 279u'
+.sp -1
+\h'1812u'\D'l 0u 279u'
+.sp -1
+\h'1719u'\D'l 0u 279u'
+.sp -1
+\h'1765u'\D'l 0u 279u'
+.sp -1
+.sp -882u
+\h'1998u'\D'l 0u 140u'
+.sp -1
+\h'1951u'\D'l 0u 140u'
+.sp -1
+\h'1905u'\D'l 0u 140u'
+.sp -1
+\h'1858u'\D'l 0u 140u'
+.sp -1
+\h'1812u'\D'l 0u 140u'
+.sp -1
+\h'1719u'\D'l 0u 140u'
+.sp -1
+\h'1765u'\D'l 0u 140u'
+.sp -1
+.sp 882u
+\h'511u'\D'l 0u 139u'
+.sp -1
+\h'465u'\D'l 0u 139u'
+.sp -1
+\h'418u'\D'l 0u 139u'
+.sp -1
+\h'372u'\D'l 0u 139u'
+.sp -1
+\h'325u'\D'l 0u 139u'
+.sp -1
+\h'233u'\D'l 0u 139u'
+.sp -1
+\h'279u'\D'l 0u 139u'
+.sp -1
+.sp -1021u
+\h'511u'\D'l 0u 279u'
+.sp -1
+\h'465u'\D'l 0u 279u'
+.sp -1
+\h'418u'\D'l 0u 279u'
+.sp -1
+\h'372u'\D'l 0u 279u'
+.sp -1
+\h'325u'\D'l 0u 279u'
+.sp -1
+\h'233u'\D'l 0u 279u'
+.sp -1
+\h'279u'\D'l 0u 279u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "MT_EOT
+.sp 6u
+\h'2404u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "MT_EOT
+.sp -6u
+\h'1016u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "MT_EOT
+.sp 1015u
+\h'244u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'
+.sp -1
+.sp -279u
+\h'186u'\D'l 0u 743u'\D'l 372u 0u'\D'l 0u -743u'\D'l -372u 0u'
+.sp -1
+.sp 186u
+\h'186u'\D'l 372u 0u'
+.sp -1
+.sp 372u
+\h'186u'\D'l 0u 0u'\D'l 372u 0u'\D'l 0u 0u'\D'l -372u 0u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp 93u
+\h'279u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -465u
+\h'279u'\&\*(g9
+.sp |\n(g8u
+.sp -279u
+\h'186u'\D'l 0u 0u'\D'l 372u 0u'\D'l 0u 0u'\D'l -372u 0u'
+.sp -1
+.sp 742u
+\h'186u'\D'l 0u 744u'\D'l 372u 0u'\D'l 0u -744u'\D'l -372u 0u'
+.sp -1
+.sp 186u
+\h'186u'\D'l 372u 0u'
+.sp -1
+.sp 372u
+\h'186u'\D'l 0u 0u'\D'l 372u 0u'\D'l 0u 0u'\D'l -372u 0u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp 93u
+\h'279u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -465u
+\h'279u'\&\*(g9
+.sp |\n(g8u
+.sp -279u
+\h'186u'\D'l 0u 0u'\D'l 372u 0u'\D'l 0u 0u'\D'l -372u 0u'
+.sp -1
+.sp -1021u
+\h'929u'\D'l 0u 0u'\D'l 372u 0u'\D'l 0u 0u'\D'l -372u 0u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -186u
+\h'1022u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp 372u
+\h'1022u'\&\*(g9
+.sp |\n(g8u
+.sp 279u
+\h'929u'\D'l 0u 0u'\D'l 372u 0u'\D'l 0u 0u'\D'l -372u 0u'
+.sp -1
+.sp -372u
+\h'929u'\D'l 372u 0u'
+.sp -1
+.sp -186u
+\h'929u'\D'l 0u 743u'\D'l 372u 0u'\D'l 0u -743u'\D'l -372u 0u'
+.sp -1
+\h'1673u'\D'l 0u 743u'\D'l 371u 0u'\D'l 0u -743u'\D'l -371u 0u'
+.sp -1
+.sp 186u
+\h'1673u'\D'l 371u 0u'
+.sp -1
+.sp 372u
+\h'1673u'\D'l 0u 0u'\D'l 371u 0u'\D'l 0u 0u'\D'l -371u 0u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp 93u
+\h'1765u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -465u
+\h'1765u'\&\*(g9
+.sp |\n(g8u
+.sp -279u
+\h'1673u'\D'l 0u 0u'\D'l 371u 0u'\D'l 0u 0u'\D'l -371u 0u'
+.sp -1
+.sp 1021u
+\h'1673u'\D'l 0u 0u'\D'l 371u 0u'\D'l 0u 0u'\D'l -371u 0u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -186u
+\h'1765u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp 372u
+\h'1765u'\&\*(g9
+.sp |\n(g8u
+.sp 279u
+\h'1673u'\D'l 0u 0u'\D'l 371u 0u'\D'l 0u 0u'\D'l -371u 0u'
+.sp -1
+.sp -372u
+\h'1673u'\D'l 371u 0u'
+.sp -1
+.sp -186u
+\h'1673u'\D'l 0u 744u'\D'l 371u 0u'\D'l 0u -744u'\D'l -371u 0u'
+.sp -1
+.sp 1022u
+\h'1673u'\D'l 0u 743u'\D'l 371u 0u'\D'l 0u -743u'\D'l -371u 0u'
+.sp -1
+.sp 186u
+\h'1673u'\D'l 371u 0u'
+.sp -1
+.sp 372u
+\h'1673u'\D'l 0u 0u'\D'l 371u 0u'\D'l 0u 0u'\D'l -371u 0u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp 93u
+\h'1765u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -465u
+\h'1765u'\&\*(g9
+.sp |\n(g8u
+.sp -279u
+\h'1673u'\D'l 0u 0u'\D'l 371u 0u'\D'l 0u 0u'\D'l -371u 0u'
+.sp -1
+.sp -2043u
+\h'2323u'\D'l 0u 0u'\D'l 372u 0u'\D'l 0u 0u'\D'l -372u 0u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -186u
+\h'2416u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp 372u
+\h'2416u'\&\*(g9
+.sp |\n(g8u
+.sp 279u
+\h'2323u'\D'l 0u 0u'\D'l 372u 0u'\D'l 0u 0u'\D'l -372u 0u'
+.sp -1
+.sp -372u
+\h'2323u'\D'l 372u 0u'
+.sp -1
+.sp -186u
+\h'2323u'\D'l 0u 743u'\D'l 372u 0u'\D'l 0u -743u'\D'l -372u 0u'
+.sp -1
+.sp 93u
+\h'558u'\D'l 371u 0u'
+.sp -1
+\h'1301u'\D'l 372u 0u'
+.sp -1
+\h'929u'\D'l -29u 18u'\D'l 12u -18u'\D'l -12u -17u'\D'l 29u 17u'
+.sp -1
+\h'1673u'\D'l -29u 18u'\D'l 11u -18u'\D'l -11u -17u'\D'l 29u 17u'
+.sp -1
+\h'2044u'\D'l 279u 0u'
+.sp -1
+\h'2323u'\D'l -29u 18u'\D'l 11u -18u'\D'l -11u -17u'\D'l 29u 17u'
+.sp -1
+.sp 558u
+\h'558u'\D'l 186u 0u'
+.sp -1
+\h'2044u'\D'l 186u 0u'
+.sp -1
+\h'744u'\D'l 0u 277u'
+.sp -1
+.sp 277u
+\h'744u'\D'l -372u 0u'
+.sp -1
+\h'372u'\D'l 0u 93u'
+.sp -1
+.sp 93u
+\h'372u'\D'l -17u -29u'\D'l 17u 12u'\D'l 17u -12u'\D'l -17u 29u'
+.sp -1
+.sp -370u
+\h'2230u'\D'l 0u 277u'
+.sp -1
+.sp 277u
+\h'2230u'\D'l -372u 0u'
+.sp -1
+\h'1858u'\D'l 0u 93u'
+.sp -1
+.sp 93u
+\h'1858u'\D'l -17u -29u'\D'l 17u 12u'\D'l 18u -12u'\D'l -18u 29u'
+.sp -1
+.sp 651u
+\h'2044u'\D'l 186u 0u'
+.sp -1
+\h'2230u'\D'l 0u 278u'
+.sp -1
+.sp 278u
+\h'2230u'\D'l -372u 0u'
+.sp -1
+\h'1858u'\D'l 0u 93u'
+.sp -1
+.sp 93u
+\h'1858u'\D'l -17u -29u'\D'l 17u 12u'\D'l 18u -12u'\D'l -18u 29u'
+.sp -1
+.sp -1950u
+\h'2695u'\D'l 92u 0u'
+.sp -1
+.sp -93u
+\h'2787u'\D'l 0u 186u'
+.sp -1
+\h'2880u'\D'l 0u 186u'
+.sp -1
+.sp 558u
+\h'1487u'\D'l 0u 185u'
+.sp -1
+\h'1580u'\D'l 0u 185u'
+.sp -1
+.sp 2136u
+\h'2044u'\D'l 186u 0u'
+.sp -1
+.sp -93u
+\h'2230u'\D'l 0u 185u'
+.sp -1
+\h'2323u'\D'l 0u 185u'
+.sp -1
+.sp -1858u
+\h'1487u'\D'l 93u -185u'
+.sp -1
+.sp -557u
+\h'2787u'\D'l 93u -186u'
+.sp -1
+.sp 2600u
+\h'2230u'\D'l 93u -185u'
+.sp -1
+.sp -1114u
+\h'558u'\D'l 186u 0u'
+.sp -1
+.sp -93u
+\h'744u'\D'l 0u 186u'
+.sp -1
+\h'836u'\D'l 0u 186u'
+.sp -1
+.sp 186u
+\h'744u'\D'l 92u -186u'
+.sp -1
+.sp -1114u
+\h'2695u'\D'l 92u 0u'
+.sp -1
+.sp -93u
+\h'2787u'\D'l 0u 185u'\D'l 93u -185u'\D'l 0u 185u'
+.sp -1
+.sp 93u
+\h'1301u'\D'l 186u 0u'
+.sp -1
+.sp 556u
+\h'744u'\D'l 92u -186u'
+.sp -1
+.sp -186u
+\h'836u'\D'l 0u 186u'
+.sp -1
+\h'744u'\D'l 0u 186u'
+.sp -1
+.sp 93u
+\h'558u'\D'l 186u 0u'
+.sp -1
+\h'2044u'\D'l 186u 0u'
+.sp -1
+.sp -93u
+\h'2230u'\D'l 0u 186u'
+.sp -1
+\h'2323u'\D'l 0u 186u'
+.sp -1
+.sp 186u
+\h'2230u'\D'l 93u -186u'
+.sp -1
+.sp 1022u
+\h'2230u'\D'l 93u -186u'
+.sp -1
+.sp -186u
+\h'2323u'\D'l 0u 186u'
+.sp -1
+\h'2230u'\D'l 0u 186u'
+.sp -1
+.sp 93u
+\h'2044u'\D'l 186u 0u'
+.sp -1
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "MT_DATA
+.sp -1857u
+\h'233u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "MT_DATA
+.sp -1857u
+\h'1719u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "MT_DATA
+.sp -836u
+\h'1719u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 5
+.nr g8 \n(.d
+.ds g9 "MT_DATA
+.sp 186u
+\h'1719u'\&\*(g9
+.sp |\n(g8u
+.sp 650u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fB Figure \n+(FG\fR: \fImbuf\fR chains on socket receive buffer
+.)z
diff --git a/share/doc/iso/wisc/figs/mbufsnd.grn b/share/doc/iso/wisc/figs/mbufsnd.grn
new file mode 100644
index 0000000..9b7ac5e
--- /dev/null
+++ b/share/doc/iso/wisc/figs/mbufsnd.grn
@@ -0,0 +1,13 @@
+.(z
+.GS C
+width 5.0
+high 6.0
+narrow 1
+medium 3
+thick 7
+pointscale on
+file mbufsnd.gsrc
+.GE
+.ce
+\fB Figure \n+(FG\fR: \fImbuf\fR chains on socket send buffer
+.)z
diff --git a/share/doc/iso/wisc/figs/mbufsnd.gsrc b/share/doc/iso/wisc/figs/mbufsnd.gsrc
new file mode 100644
index 0000000..8e2f0a8
--- /dev/null
+++ b/share/doc/iso/wisc/figs/mbufsnd.gsrc
@@ -0,0 +1,534 @@
+gremlinfile
+0 124.00 410.00
+0
+124.00 310.00
+124.00 320.00
+124.00 320.00
+124.00 320.00
+-1.00 -1.00
+1 1
+12 == user data
+ 3
+71.00 343.00
+71.00 295.00
+-1.00 -1.00
+1 0
+0
+ 3
+79.00 343.00
+79.00 295.00
+-1.00 -1.00
+1 0
+0
+ 3
+87.00 343.00
+87.00 295.00
+-1.00 -1.00
+1 0
+0
+ 3
+95.00 343.00
+95.00 295.00
+-1.00 -1.00
+1 0
+0
+ 3
+103.00 343.00
+103.00 295.00
+-1.00 -1.00
+1 0
+0
+ 3
+111.00 343.00
+111.00 295.00
+-1.00 -1.00
+1 0
+0
+ 3
+119.00 343.00
+119.00 295.00
+-1.00 -1.00
+1 0
+0
+ 3
+160.00 688.00
+256.00 688.00
+160.00 688.00
+256.00 688.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 512.00
+352.00 512.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 528.00
+352.00 496.00
+368.00 528.00
+368.00 496.00
+-1.00 -1.00
+5 0
+0
+ 0
+264.00 656.00
+264.00 666.00
+264.00 666.00
+264.00 666.00
+-1.00 -1.00
+1 1
+7 MT_DATA
+ 3
+256.00 480.00
+320.00 480.00
+-1.00 -1.00
+5 0
+0
+ 0
+272.00 480.00
+272.00 490.00
+272.00 490.00
+272.00 490.00
+-1.00 -1.00
+1 1
+6 MT_XPD
+ 3
+312.00 304.00
+312.00 256.00
+-1.00 -1.00
+1 0
+0
+ 3
+304.00 304.00
+304.00 256.00
+-1.00 -1.00
+1 0
+0
+ 3
+296.00 304.00
+296.00 256.00
+-1.00 -1.00
+1 0
+0
+ 3
+288.00 304.00
+288.00 256.00
+-1.00 -1.00
+1 0
+0
+ 3
+280.00 304.00
+280.00 256.00
+-1.00 -1.00
+1 0
+0
+ 3
+264.00 304.00
+264.00 256.00
+-1.00 -1.00
+1 0
+0
+ 3
+272.00 304.00
+272.00 256.00
+-1.00 -1.00
+1 0
+0
+ 3
+256.00 304.00
+256.00 304.00
+320.00 304.00
+320.00 304.00
+256.00 304.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 256.00
+256.00 256.00
+320.00 256.00
+320.00 256.00
+256.00 256.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 352.00
+285.00 357.00
+288.00 355.00
+291.00 357.00
+288.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 416.00
+352.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 416.00
+352.00 368.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 352.00
+256.00 224.00
+320.00 224.00
+320.00 352.00
+256.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 368.00
+288.00 368.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 368.00
+288.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 704.00
+352.00 672.00
+368.00 704.00
+368.00 672.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 688.00
+352.00 688.00
+-1.00 -1.00
+5 0
+0
+ 0
+64.00 688.00
+64.00 698.00
+64.00 698.00
+64.00 698.00
+-1.00 -1.00
+1 1
+16 so->so_snd.sb_mb
+ 3
+256.00 688.00
+251.00 685.00
+253.00 688.00
+251.00 691.00
+256.00 688.00
+-1.00 -1.00
+5 0
+0
+ 3
+48.00 704.00
+48.00 672.00
+160.00 672.00
+160.00 704.00
+48.00 704.00
+-1.00 -1.00
+5 0
+0
+ 3
+312.00 656.00
+312.00 608.00
+-1.00 -1.00
+1 0
+0
+ 3
+304.00 656.00
+304.00 608.00
+-1.00 -1.00
+1 0
+0
+ 3
+296.00 656.00
+296.00 608.00
+-1.00 -1.00
+1 0
+0
+ 3
+288.00 656.00
+288.00 608.00
+-1.00 -1.00
+1 0
+0
+ 3
+280.00 656.00
+280.00 608.00
+-1.00 -1.00
+1 0
+0
+ 3
+264.00 656.00
+264.00 608.00
+-1.00 -1.00
+1 0
+0
+ 3
+272.00 656.00
+272.00 608.00
+-1.00 -1.00
+1 0
+0
+ 0
+271.00 305.00
+271.00 315.00
+271.00 315.00
+271.00 315.00
+-1.00 -1.00
+1 1
+6 MT_EOT
+ 3
+256.00 704.00
+256.00 576.00
+320.00 576.00
+320.00 704.00
+256.00 704.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 672.00
+320.00 672.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 608.00
+256.00 608.00
+320.00 608.00
+320.00 608.00
+256.00 608.00
+-1.00 -1.00
+5 0
+0
+ 0
+272.00 592.00
+272.00 602.00
+272.00 602.00
+272.00 602.00
+-1.00 -1.00
+1 1
+6 m_next
+ 0
+272.00 688.00
+272.00 698.00
+272.00 698.00
+272.00 698.00
+-1.00 -1.00
+1 1
+5 m_act
+ 3
+256.00 656.00
+256.00 656.00
+320.00 656.00
+320.00 656.00
+256.00 656.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 528.00
+256.00 400.00
+320.00 400.00
+320.00 528.00
+256.00 528.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 496.00
+320.00 496.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 432.00
+256.00 432.00
+320.00 432.00
+320.00 432.00
+256.00 432.00
+-1.00 -1.00
+5 0
+0
+ 0
+272.00 416.00
+272.00 426.00
+272.00 426.00
+272.00 426.00
+-1.00 -1.00
+1 1
+6 m_next
+ 0
+272.00 512.00
+272.00 522.00
+272.00 522.00
+272.00 522.00
+-1.00 -1.00
+1 1
+5 m_act
+ 3
+256.00 304.00
+256.00 304.00
+320.00 304.00
+320.00 304.00
+256.00 304.00
+-1.00 -1.00
+5 0
+0
+ 0
+272.00 336.00
+272.00 346.00
+272.00 346.00
+272.00 346.00
+-1.00 -1.00
+1 1
+5 m_act
+ 0
+272.00 240.00
+272.00 250.00
+272.00 250.00
+272.00 250.00
+-1.00 -1.00
+1 1
+6 m_next
+ 3
+256.00 256.00
+256.00 256.00
+320.00 256.00
+320.00 256.00
+256.00 256.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 320.00
+320.00 320.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 352.00
+256.00 224.00
+320.00 224.00
+320.00 352.00
+256.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 592.00
+352.00 592.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 592.00
+352.00 544.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 544.00
+288.00 544.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 544.00
+288.00 528.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 528.00
+285.00 533.00
+288.00 531.00
+291.00 533.00
+288.00 528.00
+-1.00 -1.00
+5 0
+0
+ 3
+320.00 240.00
+352.00 240.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 256.00
+352.00 224.00
+-1.00 -1.00
+5 0
+0
+ 3
+368.00 256.00
+368.00 224.00
+-1.00 -1.00
+5 0
+0
+ 3
+352.00 224.00
+368.00 256.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 472.00
+288.00 464.00
+-1.00 -1.00
+5 0
+0
+ 0
+72.00 480.00
+72.00 490.00
+72.00 490.00
+72.00 490.00
+-1.00 -1.00
+1 1
+13 tpcb->tp_Xuna
+ 0
+72.00 464.00
+72.00 474.00
+72.00 474.00
+72.00 474.00
+-1.00 -1.00
+1 1
+18 sequence number of
+ 0
+72.00 448.00
+72.00 458.00
+72.00 458.00
+72.00 458.00
+-1.00 -1.00
+1 1
+8 XPD TPDU
+ 3
+288.00 464.00
+208.00 456.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 472.00
+208.00 480.00
+208.00 480.00
+-1.00 -1.00
+5 0
+0
+ 3
+208.00 488.00
+184.00 472.00
+208.00 448.00
+-1.00 -1.00
+5 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/mbufsnd.nr b/share/doc/iso/wisc/figs/mbufsnd.nr
new file mode 100644
index 0000000..4b38574
--- /dev/null
+++ b/share/doc/iso/wisc/figs/mbufsnd.nr
@@ -0,0 +1,284 @@
+.(z
+.br
+.nr g1 2304u
+.nr g2 3455u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+\D't 1u'
+.sp -1
+.sp 1554u
+\h'1152u'\D'l -173u 115u'\D'l 173u 173u'
+.sp -1
+.sp 115u
+\h'1728u'\D'l -576u -57u'\D'l 0u 0u'
+.sp -1
+.sp 58u
+\h'1728u'\D'l -576u 57u'
+.sp -1
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "XPD TPDU
+.sp 115u
+\h'173u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "sequence number of
+\h'173u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "tpcb->tp_Xuna
+.sp -115u
+\h'173u'\&\*(g9
+.sp |\n(g8u
+.sp -58u
+\h'1728u'\D'l 0u 58u'
+.sp -1
+.sp 1786u
+\h'2189u'\D'l 115u -231u'
+.sp -1
+.sp -231u
+\h'2304u'\D'l 0u 231u'
+.sp -1
+\h'2189u'\D'l 0u 231u'
+.sp -1
+.sp 116u
+\h'1959u'\D'l 230u 0u'
+.sp -1
+.sp -2073u
+\h'1728u'\D'l -21u -36u'\D'l 21u 14u'\D'l 22u -14u'\D'l -22u 36u'
+.sp -1
+.sp -115u
+\h'1728u'\D'l 0u 115u'
+.sp -1
+\h'2189u'\D'l -461u 0u'
+.sp -1
+.sp -346u
+\h'2189u'\D'l 0u 346u'
+.sp -1
+\h'1959u'\D'l 230u 0u'
+.sp -1
+.sp 1727u
+\h'1498u'\D'l 0u 922u'\D'l 461u 0u'\D'l 0u -922u'\D'l -461u 0u'
+.sp -1
+.sp 231u
+\h'1498u'\D'l 461u 0u'
+.sp -1
+.sp 460u
+\h'1498u'\D'l 0u 0u'\D'l 461u 0u'\D'l 0u 0u'\D'l -461u 0u'
+.sp -1
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp 116u
+\h'1613u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -576u
+\h'1613u'\&\*(g9
+.sp |\n(g8u
+.sp -345u
+\h'1498u'\D'l 0u 0u'\D'l 461u 0u'\D'l 0u 0u'\D'l -461u 0u'
+.sp -1
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -1497u
+\h'1613u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp -807u
+\h'1613u'\&\*(g9
+.sp |\n(g8u
+.sp -922u
+\h'1498u'\D'l 0u 0u'\D'l 461u 0u'\D'l 0u 0u'\D'l -461u 0u'
+.sp -1
+.sp -461u
+\h'1498u'\D'l 461u 0u'
+.sp -1
+.sp -229u
+\h'1498u'\D'l 0u 921u'\D'l 461u 0u'\D'l 0u -921u'\D'l -461u 0u'
+.sp -1
+.sp -922u
+\h'1498u'\D'l 0u 0u'\D'l 461u 0u'\D'l 0u 0u'\D'l -461u 0u'
+.sp -1
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "m_act
+.sp -230u
+\h'1613u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "m_next
+.sp 461u
+\h'1613u'\&\*(g9
+.sp |\n(g8u
+.sp 346u
+\h'1498u'\D'l 0u 0u'\D'l 461u 0u'\D'l 0u 0u'\D'l -461u 0u'
+.sp -1
+.sp -461u
+\h'1498u'\D'l 461u 0u'
+.sp -1
+.sp -230u
+\h'1498u'\D'l 0u 922u'\D'l 461u 0u'\D'l 0u -922u'\D'l -461u 0u'
+.sp -1
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "MT_EOT
+.sp 2872u
+\h'1606u'\&\*(g9
+.sp |\n(g8u
+\D's 4u'
+.sp -1
+.sp 345u
+\h'1613u'\D'l 0u 346u'
+.sp -1
+\h'1555u'\D'l 0u 346u'
+.sp -1
+\h'1670u'\D'l 0u 346u'
+.sp -1
+\h'1728u'\D'l 0u 346u'
+.sp -1
+\h'1786u'\D'l 0u 346u'
+.sp -1
+\h'1843u'\D'l 0u 346u'
+.sp -1
+\h'1901u'\D'l 0u 346u'
+.sp -1
+\D's -1u'
+.sp -1
+.sp -345u
+\D'l 0u 230u'\D'l 807u 0u'\D'l 0u -230u'\D'l -807u 0u'
+.sp -1
+.sp 115u
+\h'1498u'\D'l -36u 22u'\D'l 14u -22u'\D'l -14u -21u'\D'l 36u 21u'
+.sp -1
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "so->so_snd.sb_mb
+\h'115u'\&\*(g9
+.sp |\n(g8u
+\h'1959u'\D'l 230u 0u'
+.sp -1
+.sp -115u
+\h'2189u'\D'l 0u 230u'\D'l 115u -230u'\D'l 0u 230u'
+.sp -1
+.sp 2418u
+\h'1728u'\D'l 0u 115u'
+.sp -1
+\h'2189u'\D'l -461u 0u'
+.sp -1
+.sp 115u
+\h'1498u'\D'l 0u 922u'\D'l 461u 0u'\D'l 0u -922u'\D'l -461u 0u'
+.sp -1
+.sp -461u
+\h'2189u'\D'l 0u 346u'
+.sp -1
+\h'1959u'\D'l 230u 0u'
+.sp -1
+.sp 461u
+\h'1728u'\D'l -21u -36u'\D'l 21u 15u'\D'l 22u -15u'\D'l -22u 36u'
+.sp -1
+.sp 691u
+\h'1498u'\D'l 0u 0u'\D'l 461u 0u'\D'l 0u 0u'\D'l -461u 0u'
+.sp -1
+.sp -345u
+\h'1498u'\D'l 0u 0u'\D'l 461u 0u'\D'l 0u 0u'\D'l -461u 0u'
+.sp -1
+\D's 4u'
+.sp -1
+\h'1613u'\D'l 0u 345u'
+.sp -1
+\h'1555u'\D'l 0u 345u'
+.sp -1
+\h'1670u'\D'l 0u 345u'
+.sp -1
+\h'1728u'\D'l 0u 345u'
+.sp -1
+\h'1786u'\D'l 0u 345u'
+.sp -1
+\h'1843u'\D'l 0u 345u'
+.sp -1
+\h'1901u'\D'l 0u 345u'
+.sp -1
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "MT_XPD
+.sp -1267u
+\h'1613u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'
+.sp -1
+.sp -1267u
+\h'1498u'\D'l 461u 0u'
+.sp -1
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "MT_DATA
+.sp -1267u
+\h'1555u'\&\*(g9
+.sp |\n(g8u
+.sp -345u
+\h'2189u'\D'l 0u 229u'\D'l 115u -229u'\D'l 0u 229u'
+.sp -1
+.sp 115u
+\h'1959u'\D'l 230u 0u'
+.sp -1
+.sp -1267u
+\h'807u'\D'l 691u 0u'\D'l -691u 0u'\D'l 691u 0u'
+.sp -1
+\D's 4u'
+.sp -1
+.sp 2483u
+\h'511u'\D'l 0u 346u'
+.sp -1
+\h'454u'\D'l 0u 346u'
+.sp -1
+\h'396u'\D'l 0u 346u'
+.sp -1
+\h'338u'\D'l 0u 346u'
+.sp -1
+\h'281u'\D'l 0u 346u'
+.sp -1
+\h'223u'\D'l 0u 346u'
+.sp -1
+\h'166u'\D'l 0u 346u'
+.sp -1
+.ft R
+.ps 6
+.nr g8 \n(.d
+.ds g9 "== user data
+.sp 238u
+\h'547u'\&\*(g9
+.sp |\n(g8u
+.sp 857u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fB Figure \n+(FG\fR: \fImbuf\fR chains on socket send buffer
+.)z
diff --git a/share/doc/iso/wisc/figs/osi_addr.grn b/share/doc/iso/wisc/figs/osi_addr.grn
new file mode 100644
index 0000000..333260b
--- /dev/null
+++ b/share/doc/iso/wisc/figs/osi_addr.grn
@@ -0,0 +1,18 @@
+.(z
+.GS C
+width 6.0
+high 7.0
+1 8
+2 10
+3 12
+4 14
+sc 0.4
+narrow 1
+medium 3
+thick 7
+pointscale off
+file osi_addr.gsrc
+.GE
+.ce
+\fB Figure \n+(FG\fR: Format of OSI addresses
+.)z
diff --git a/share/doc/iso/wisc/figs/osi_addr.gsrc b/share/doc/iso/wisc/figs/osi_addr.gsrc
new file mode 100644
index 0000000..0a69b96
--- /dev/null
+++ b/share/doc/iso/wisc/figs/osi_addr.gsrc
@@ -0,0 +1,62 @@
+gremlinfile
+0 87.01 78.31
+3
+87.01 641.69
+87.01 567.61
+349.25 567.61
+349.25 641.69
+87.01 641.69
+-1.00 -1.00
+5 0
+0
+ 0
+138.15 617.43
+138.15 636.43
+138.15 636.43
+138.15 636.43
+-1.00 -1.00
+1 4
+3 IDP
+ 3
+212.23 641.69
+212.23 567.61
+-1.00 -1.00
+5 0
+0
+ 3
+87.01 609.57
+212.23 609.57
+-1.00 -1.00
+5 0
+0
+ 0
+98.81 585.31
+98.81 604.31
+98.81 604.31
+98.81 604.31
+-1.00 -1.00
+1 4
+3 AFI
+ 3
+149.29 610.22
+149.29 567.61
+-1.00 -1.00
+5 0
+0
+ 0
+170.27 586.62
+170.27 605.62
+170.27 605.62
+170.27 605.62
+-1.00 -1.00
+1 4
+3 IDI
+ 0
+271.23 598.42
+271.23 617.42
+271.23 617.42
+271.23 617.42
+-1.00 -1.00
+1 4
+3 DSP
+ -1
diff --git a/share/doc/iso/wisc/figs/osi_addr.nr b/share/doc/iso/wisc/figs/osi_addr.nr
new file mode 100644
index 0000000..f4c88fa
--- /dev/null
+++ b/share/doc/iso/wisc/figs/osi_addr.nr
@@ -0,0 +1,59 @@
+.(z
+.br
+.nr g1 3456u
+.nr g2 1722u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+.ft R
+.ps 14
+.nr g8 \n(.d
+.ds g9 "DSP
+.sp 570u
+\h'2428u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 14
+.nr g8 \n(.d
+.ds g9 "IDI
+.sp 726u
+\h'1097u'\&\*(g9
+.sp |\n(g8u
+\D't 1u'
+.sp -1
+.sp 415u
+\h'821u'\D'l 0u 561u'
+.sp -1
+.ft R
+.ps 14
+.nr g8 \n(.d
+.ds g9 "AFI
+.sp 328u
+\h'156u'\&\*(g9
+.sp |\n(g8u
+.sp 8u
+\D'l 1650u 0u'
+.sp -1
+.sp -423u
+\h'1650u'\D'l 0u 976u'
+.sp -1
+.ft R
+.ps 14
+.nr g8 \n(.d
+.ds g9 "IDP
+.sp 320u
+\h'674u'\&\*(g9
+.sp |\n(g8u
+\D'l 0u 976u'\D'l 3456u 0u'\D'l 0u -976u'\D'l -3456u 0u'
+.sp -1
+.sp 1722u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fB Figure \n+(FG\fR: Format of OSI addresses
+.)z
diff --git a/share/doc/iso/wisc/figs/tppt.grn b/share/doc/iso/wisc/figs/tppt.grn
new file mode 100644
index 0000000..649c3d9
--- /dev/null
+++ b/share/doc/iso/wisc/figs/tppt.grn
@@ -0,0 +1,18 @@
+.(z
+.GS C
+width 6.0
+high 7.0
+1 8
+2 10
+3 12
+4 14
+sc 0.4
+narrow 1
+medium 3
+thick 7
+pointscale off
+file tppt.gsrc
+.GE
+.ce
+\fB Figure \n+(FG\fR: Output of tppt(8)
+.)z
diff --git a/share/doc/iso/wisc/figs/tppt.gsrc b/share/doc/iso/wisc/figs/tppt.gsrc
new file mode 100644
index 0000000..5643940
--- /dev/null
+++ b/share/doc/iso/wisc/figs/tppt.gsrc
@@ -0,0 +1,411 @@
+gremlinfile
+0 352.00 352.00
+0
+352.00 368.00
+352.00 381.00
+352.00 381.00
+352.00 381.00
+-1.00 -1.00
+2 2
+17 this is a CR TPDU
+ 3
+256.00 384.00
+256.00 400.00
+368.00 400.00
+368.00 384.00
+256.00 384.00
+-1.00 -1.00
+5 0
+0
+ 0
+112.00 288.00
+112.00 302.00
+112.00 302.00
+112.00 302.00
+-1.00 -1.00
+1 3
+59 +12: 0x02 0x00 0x07 0xc0 20: 0x01 0x08 0x00 0x00
+ 0
+112.00 304.00
+112.00 318.00
+112.00 318.00
+112.00 318.00
+-1.00 -1.00
+1 3
+59 + 8: 0x06 0x74 0x70 0x70 12: 0x69 0x6e 0xc7 0xc2
+ 0
+112.00 320.00
+112.00 334.00
+112.00 334.00
+112.00 334.00
+-1.00 -1.00
+1 3
+59 + 0: 0x15 0xe0 0x00 0x00 4: 0x00 0x03 0x00 0xc1
+ 0
+160.00 208.00
+160.00 221.00
+160.00 221.00
+160.00 221.00
+-1.00 -1.00
+2 2
+17 class and options
+ 3
+112.00 208.00
+144.00 208.00
+-1.00 -1.00
+6 0
+0
+ 3
+336.00 320.00
+368.00 320.00
+-1.00 -1.00
+6 0
+0
+ 3
+80.00 352.00
+75.00 349.00
+77.00 352.00
+75.00 355.00
+80.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+96.00 288.00
+91.00 285.00
+93.00 288.00
+91.00 291.00
+96.00 288.00
+-1.00 -1.00
+5 0
+0
+ 3
+96.00 288.00
+48.00 288.00
+48.00 240.00
+96.00 240.00
+-1.00 -1.00
+5 0
+0
+ 0
+336.00 432.00
+336.00 445.00
+336.00 445.00
+336.00 445.00
+-1.00 -1.00
+2 2
+22 indicates a TPDU event
+ 0
+48.00 448.00
+48.00 461.00
+48.00 461.00
+48.00 461.00
+-1.00 -1.00
+2 2
+18 TPDU was received;
+ 0
+128.00 448.00
+128.00 461.00
+128.00 461.00
+128.00 461.00
+-1.00 -1.00
+2 2
+28 its total length is 22 bytes
+ 0
+48.00 432.00
+48.00 445.00
+48.00 445.00
+48.00 445.00
+-1.00 -1.00
+2 2
+26 and its header is 22 bytes
+ 0
+48.00 416.00
+48.00 429.00
+48.00 429.00
+48.00 429.00
+-1.00 -1.00
+2 2
+15 (21 in the LI +
+ 0
+112.00 416.00
+112.00 429.00
+112.00 429.00
+112.00 429.00
+-1.00 -1.00
+2 2
+13 1 for the LI)
+ 3
+112.00 240.00
+144.00 240.00
+-1.00 -1.00
+1 0
+0
+ 0
+160.00 240.00
+160.00 253.00
+160.00 253.00
+160.00 253.00
+-1.00 -1.00
+2 2
+2 LI
+ 3
+208.00 320.00
+336.00 320.00
+-1.00 -1.00
+2 0
+0
+ 3
+112.00 224.00
+144.00 224.00
+-1.00 -1.00
+2 0
+0
+ 0
+160.00 224.00
+160.00 237.00
+160.00 237.00
+160.00 237.00
+-1.00 -1.00
+2 2
+16 dst-ref, src-ref
+ 3
+304.00 240.00
+336.00 240.00
+-1.00 -1.00
+3 0
+0
+ 0
+352.00 240.00
+352.00 253.00
+352.00 253.00
+352.00 253.00
+-1.00 -1.00
+2 2
+26 calling transport selector
+ 3
+144.00 288.00
+224.00 288.00
+-1.00 -1.00
+4 0
+0
+ 3
+304.00 224.00
+336.00 224.00
+-1.00 -1.00
+4 0
+0
+ 0
+352.00 224.00
+352.00 237.00
+352.00 237.00
+352.00 237.00
+-1.00 -1.00
+2 2
+25 called transport selector
+ 3
+240.00 288.00
+336.00 288.00
+-1.00 -1.00
+5 0
+0
+ 3
+304.00 208.00
+336.00 208.00
+-1.00 -1.00
+5 0
+0
+ 0
+352.00 208.00
+352.00 221.00
+352.00 221.00
+352.00 221.00
+-1.00 -1.00
+2 2
+9 TPDU size
+ 3
+144.00 320.00
+192.00 320.00
+-1.00 -1.00
+1 0
+0
+ 0
+176.00 240.00
+176.00 253.00
+176.00 253.00
+176.00 253.00
+-1.00 -1.00
+2 2
+11 , TPDU type
+ 3
+400.00 432.00
+400.00 400.00
+-1.00 -1.00
+5 0
+0
+ 3
+400.00 400.00
+397.00 405.00
+400.00 403.00
+403.00 405.00
+400.00 400.00
+-1.00 -1.00
+5 0
+0
+ 3
+368.00 400.00
+368.00 384.00
+432.00 384.00
+432.00 400.00
+368.00 400.00
+-1.00 -1.00
+5 0
+0
+ 0
+384.00 384.00
+384.00 398.00
+384.00 398.00
+384.00 398.00
+-1.00 -1.00
+1 3
+4 tpdu
+ 3
+80.00 352.00
+48.00 352.00
+48.00 400.00
+-1.00 -1.00
+5 0
+0
+ 3
+96.00 336.00
+96.00 272.00
+416.00 272.00
+416.00 336.00
+96.00 336.00
+-1.00 -1.00
+5 0
+0
+ 3
+80.00 368.00
+80.00 336.00
+224.00 336.00
+224.00 368.00
+80.00 368.00
+-1.00 -1.00
+5 0
+0
+ 0
+96.00 352.00
+96.00 366.00
+96.00 366.00
+96.00 366.00
+-1.00 -1.00
+1 3
+19 INPUT total len 22
+ 0
+96.00 336.00
+96.00 350.00
+96.00 350.00
+96.00 350.00
+-1.00 -1.00
+1 3
+13 HDRLEN: 21+1
+ 3
+224.00 352.00
+224.00 336.00
+320.00 336.00
+320.00 352.00
+224.00 352.00
+-1.00 -1.00
+5 0
+0
+ 0
+240.00 336.00
+240.00 350.00
+240.00 350.00
+240.00 350.00
+-1.00 -1.00
+1 3
+12 CR_TPDU_type
+ 0
+336.00 336.00
+336.00 350.00
+336.00 350.00
+336.00 350.00
+-1.00 -1.00
+1 3
+23 cdt 0(0x0) dref 0x0
+ 3
+288.00 352.00
+285.00 357.00
+288.00 355.00
+291.00 357.00
+288.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 352.00
+288.00 368.00
+352.00 368.00
+-1.00 -1.00
+5 0
+0
+ 0
+80.00 144.00
+80.00 158.00
+80.00 158.00
+80.00 158.00
+-1.00 -1.00
+1 3
+24 1a: Ref 22 arg 14(0xe)
+ 0
+256.00 384.00
+256.00 398.00
+256.00 398.00
+256.00 398.00
+-1.00 -1.00
+1 3
+22 @ 91990 : 0000.435125
+ 3
+288.00 400.00
+285.00 405.00
+288.00 403.00
+291.00 405.00
+288.00 400.00
+-1.00 -1.00
+5 0
+0
+ 3
+288.00 400.00
+288.00 416.00
+-1.00 -1.00
+5 0
+0
+ 0
+240.00 416.00
+240.00 429.00
+240.00 429.00
+240.00 429.00
+-1.00 -1.00
+2 2
+30 event # : time since 1st event
+ 3
+368.00 320.00
+384.00 320.00
+-1.00 -1.00
+3 0
+0
+ 3
+144.00 304.00
+336.00 304.00
+-1.00 -1.00
+3 0
+0
+ 3
+336.00 304.00
+384.00 304.00
+-1.00 -1.00
+4 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/tppt.gsrc.save b/share/doc/iso/wisc/figs/tppt.gsrc.save
new file mode 100644
index 0000000..3ad56ef
--- /dev/null
+++ b/share/doc/iso/wisc/figs/tppt.gsrc.save
@@ -0,0 +1,335 @@
+gremlinfile
+0 240.00 464.00
+0
+240.00 256.00
+240.00 269.00
+240.00 269.00
+240.00 269.00
+-1.00 -1.00
+2 2
+11 , TPDU type
+ 3
+208.00 320.00
+256.00 320.00
+-1.00 -1.00
+1 0
+0
+ 0
+224.00 192.00
+224.00 205.00
+224.00 205.00
+224.00 205.00
+-1.00 -1.00
+2 2
+9 TPDU size
+ 3
+176.00 192.00
+208.00 192.00
+-1.00 -1.00
+5 0
+0
+ 3
+304.00 288.00
+400.00 288.00
+-1.00 -1.00
+5 0
+0
+ 0
+224.00 208.00
+224.00 221.00
+224.00 221.00
+224.00 221.00
+-1.00 -1.00
+2 2
+25 called transport selector
+ 3
+176.00 208.00
+208.00 208.00
+-1.00 -1.00
+4 0
+0
+ 3
+208.00 288.00
+288.00 288.00
+-1.00 -1.00
+4 0
+0
+ 3
+432.00 304.00
+464.00 304.00
+-1.00 -1.00
+4 0
+0
+ 3
+160.00 336.00
+160.00 272.00
+480.00 272.00
+480.00 336.00
+160.00 336.00
+-1.00 -1.00
+5 0
+0
+ 0
+224.00 224.00
+224.00 237.00
+224.00 237.00
+224.00 237.00
+-1.00 -1.00
+2 2
+26 calling transport selector
+ 3
+176.00 224.00
+208.00 224.00
+-1.00 -1.00
+3 0
+0
+ 3
+208.00 304.00
+432.00 304.00
+-1.00 -1.00
+3 0
+0
+ 3
+432.00 320.00
+464.00 320.00
+-1.00 -1.00
+3 0
+0
+ 0
+224.00 240.00
+224.00 253.00
+224.00 253.00
+224.00 253.00
+-1.00 -1.00
+2 2
+16 dst-ref, src-ref
+ 3
+176.00 240.00
+208.00 240.00
+-1.00 -1.00
+2 0
+0
+ 3
+272.00 320.00
+400.00 320.00
+-1.00 -1.00
+2 0
+0
+ 0
+224.00 256.00
+224.00 269.00
+224.00 269.00
+224.00 269.00
+-1.00 -1.00
+2 2
+2 LI
+ 3
+176.00 256.00
+208.00 256.00
+-1.00 -1.00
+1 0
+0
+ 0
+48.00 416.00
+48.00 429.00
+48.00 429.00
+48.00 429.00
+-1.00 -1.00
+2 2
+13 1 for the LI)
+ 0
+48.00 432.00
+48.00 445.00
+48.00 445.00
+48.00 445.00
+-1.00 -1.00
+2 2
+15 (21 in the LI +
+ 0
+48.00 448.00
+48.00 461.00
+48.00 461.00
+48.00 461.00
+-1.00 -1.00
+2 2
+26 and its header is 22 bytes
+ 0
+48.00 464.00
+48.00 477.00
+48.00 477.00
+48.00 477.00
+-1.00 -1.00
+2 2
+28 its total length is 22 bytes
+ 0
+48.00 480.00
+48.00 493.00
+48.00 493.00
+48.00 493.00
+-1.00 -1.00
+2 2
+18 TPDU was received;
+ 0
+176.00 416.00
+176.00 429.00
+176.00 429.00
+176.00 429.00
+-1.00 -1.00
+2 2
+17 TPDU is a CR TPDU
+ 0
+368.00 432.00
+368.00 445.00
+368.00 445.00
+368.00 445.00
+-1.00 -1.00
+2 2
+22 indicates a TPDU event
+ 3
+160.00 304.00
+112.00 304.00
+112.00 256.00
+160.00 256.00
+-1.00 -1.00
+5 0
+0
+ 3
+160.00 304.00
+155.00 301.00
+157.00 304.00
+155.00 307.00
+160.00 304.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 352.00
+256.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+256.00 352.00
+253.00 357.00
+256.00 355.00
+259.00 357.00
+256.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+80.00 352.00
+48.00 352.00
+48.00 400.00
+-1.00 -1.00
+5 0
+0
+ 3
+80.00 352.00
+75.00 349.00
+77.00 352.00
+75.00 355.00
+80.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 384.00
+416.00 416.00
+-1.00 -1.00
+5 0
+0
+ 3
+416.00 384.00
+413.00 389.00
+416.00 387.00
+419.00 389.00
+416.00 384.00
+-1.00 -1.00
+5 0
+0
+ 3
+192.00 352.00
+192.00 336.00
+288.00 336.00
+288.00 352.00
+192.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+80.00 352.00
+80.00 336.00
+192.00 336.00
+192.00 352.00
+80.00 352.00
+-1.00 -1.00
+5 0
+0
+ 3
+80.00 368.00
+80.00 352.00
+208.00 352.00
+208.00 368.00
+80.00 368.00
+-1.00 -1.00
+5 0
+0
+ 3
+400.00 384.00
+400.00 368.00
+448.00 368.00
+448.00 384.00
+400.00 384.00
+-1.00 -1.00
+5 0
+0
+ 0
+96.00 288.00
+96.00 303.00
+96.00 303.00
+96.00 303.00
+-1.00 -1.00
+1 2
+66 +16: 0x02 0x00 0x07 0xc0 20: 0x01 0x08 0x00 0x00
+ 0
+96.00 304.00
+96.00 319.00
+96.00 319.00
+96.00 319.00
+-1.00 -1.00
+1 2
+66 + 8: 0x06 0x74 0x70 0x70 12: 0x69 0x6e 0x67 0xc2
+ 0
+96.00 320.00
+96.00 335.00
+96.00 335.00
+96.00 335.00
+-1.00 -1.00
+1 2
+66 + 0: 0x15 0xe0 0x00 0x00 4: 0x00 0x03 0x00 0xc1
+ 0
+96.00 336.00
+96.00 351.00
+96.00 351.00
+96.00 351.00
+-1.00 -1.00
+1 2
+56 HDRLEN: 21+1 CR_TPDU_type cdt 0(0x0) dref 0x0
+ 0
+96.00 352.00
+96.00 367.00
+96.00 367.00
+96.00 367.00
+-1.00 -1.00
+1 2
+18 INPUT total len 22
+ 0
+96.00 368.00
+96.00 383.00
+96.00 383.00
+96.00 383.00
+-1.00 -1.00
+1 2
+60 1a: Ref 22 arg 14(0xe), @ 91990 : 0000.435125 tpdu
+ -1
diff --git a/share/doc/iso/wisc/figs/tppt.nr b/share/doc/iso/wisc/figs/tppt.nr
new file mode 100644
index 0000000..a6dcd18
--- /dev/null
+++ b/share/doc/iso/wisc/figs/tppt.nr
@@ -0,0 +1,296 @@
+.(z
+.br
+.nr g1 3456u
+.nr g2 2736u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+\D's 16u'\D't 1u'
+.sp -1
+.sp 1296u
+\h'2592u'\D'l 432u 0u'
+.sp -1
+\D's -1u'\D't 7u'
+.sp -1
+\h'864u'\D'l 1728u 0u'
+.sp -1
+.sp -144u
+\h'2880u'\D'l 144u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "event # : time since 1st event
+.sp -864u
+\h'1728u'\&\*(g9
+.sp |\n(g8u
+\D't 1u'
+.sp -1
+.sp -720u
+\h'2160u'\D'l 0u -144u'
+.sp -1
+\h'2160u'\D'l -27u -45u'\D'l 27u 18u'\D'l 27u -18u'\D'l -27u 45u'
+.sp -1
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "@ 91990 : 0000.435125
+.sp 144u
+\h'1872u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "1a: Ref 22 arg 14(0xe)
+.sp 2304u
+\h'288u'\&\*(g9
+.sp |\n(g8u
+.sp 432u
+\h'2160u'\D'l 0u -144u'\D'l 576u 0u'
+.sp -1
+\h'2160u'\D'l -27u -45u'\D'l 27u 18u'\D'l 27u -18u'\D'l -27u 45u'
+.sp -1
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "cdt 0(0x0) dref 0x0
+.sp 144u
+\h'2592u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "CR_TPDU_type
+.sp 144u
+\h'1728u'\&\*(g9
+.sp |\n(g8u
+\h'1584u'\D'l 0u 144u'\D'l 864u 0u'\D'l 0u -144u'\D'l -864u 0u'
+.sp -1
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "HDRLEN: 21+1
+.sp 144u
+\h'432u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "INPUT total len 22
+\h'432u'\&\*(g9
+.sp |\n(g8u
+.sp -144u
+\h'288u'\D'l 0u 288u'\D'l 1296u 0u'\D'l 0u -288u'\D'l -1296u 0u'
+.sp -1
+.sp 288u
+\h'432u'\D'l 0u 576u'\D'l 2880u 0u'\D'l 0u -576u'\D'l -2880u 0u'
+.sp -1
+.sp -144u
+\h'288u'\D'l -288u 0u'\D'l 0u -432u'
+.sp -1
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "tpdu
+.sp -288u
+\h'3024u'\&\*(g9
+.sp |\n(g8u
+.sp -432u
+\h'2880u'\D'l 0u 144u'\D'l 576u 0u'\D'l 0u -144u'\D'l -576u 0u'
+.sp -1
+\h'3168u'\D'l -27u -45u'\D'l 27u 18u'\D'l 27u -18u'\D'l -27u 45u'
+.sp -1
+.sp -288u
+\h'3168u'\D'l 0u 288u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 ", TPDU type
+.sp 1728u
+\h'1152u'\&\*(g9
+.sp |\n(g8u
+\D's 4u'
+.sp -1
+.sp 1008u
+\h'864u'\D'l 432u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "TPDU size
+.sp 1008u
+\h'2736u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'
+.sp -1
+.sp 1008u
+\h'2304u'\D'l 288u 0u'
+.sp -1
+.sp -720u
+\h'1728u'\D'l 864u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "called transport selector
+.sp 576u
+\h'2736u'\&\*(g9
+.sp |\n(g8u
+\D's 16u'
+.sp -1
+.sp 576u
+\h'2304u'\D'l 288u 0u'
+.sp -1
+.sp -576u
+\h'864u'\D'l 720u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "calling transport selector
+.sp 432u
+\h'2736u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'\D't 7u'
+.sp -1
+.sp 432u
+\h'2304u'\D'l 288u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "dst-ref, src-ref
+.sp 144u
+\h'1008u'\&\*(g9
+.sp |\n(g8u
+\D's 20u'\D't 1u'
+.sp -1
+.sp 144u
+\h'576u'\D'l 288u 0u'
+.sp -1
+.sp -864u
+\h'1440u'\D'l 1152u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "LI
+.sp 720u
+\h'1008u'\&\*(g9
+.sp |\n(g8u
+\D's 4u'
+.sp -1
+.sp 720u
+\h'576u'\D'l 288u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "1 for the LI)
+.sp -1584u
+\h'576u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "(21 in the LI +
+.sp -1584u
+\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "and its header is 22 bytes
+.sp -1728u
+\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "its total length is 22 bytes
+.sp -1872u
+\h'720u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "TPDU was received;
+.sp -1872u
+\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "indicates a TPDU event
+.sp -1728u
+\h'2592u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'
+.sp -1
+.sp -432u
+\h'432u'\D'l -432u 0u'\D'l 0u 432u'\D'l 432u 0u'
+.sp -1
+\h'432u'\D'l -45u 27u'\D'l 18u -27u'\D'l -18u -27u'\D'l 45u 27u'
+.sp -1
+.sp -576u
+\h'288u'\D'l -45u 27u'\D'l 18u -27u'\D'l -18u -27u'\D'l 45u 27u'
+.sp -1
+\D't 3u'
+.sp -1
+.sp 288u
+\h'2592u'\D'l 288u 0u'
+.sp -1
+.sp 1008u
+\h'576u'\D'l 288u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "class and options
+\h'1008u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "+ 0: 0x15 0xe0 0x00 0x00 4: 0x00 0x03 0x00 0xc1
+.sp -1008u
+\h'576u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "+ 8: 0x06 0x74 0x70 0x70 12: 0x69 0x6e 0xc7 0xc2
+.sp -864u
+\h'576u'\&\*(g9
+.sp |\n(g8u
+.ft R
+.ps 12
+.nr g8 \n(.d
+.ds g9 "+12: 0x02 0x00 0x07 0xc0 20: 0x01 0x08 0x00 0x00
+.sp -720u
+\h'576u'\&\*(g9
+.sp |\n(g8u
+\D't 1u'
+.sp -1
+.sp -1584u
+\h'1872u'\D'l 0u -144u'\D'l 1008u 0u'\D'l 0u 144u'\D'l -1008u 0u'
+.sp -1
+.ft I
+.ps 10
+.nr g8 \n(.d
+.ds g9 "this is a CR TPDU
+.sp 144u
+\h'2736u'\&\*(g9
+.sp |\n(g8u
+.sp 2160u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fB Figure \n+(FG\fR: Output of tppt(8)
+.)z
diff --git a/share/doc/iso/wisc/figs/trans_flow.grn b/share/doc/iso/wisc/figs/trans_flow.grn
new file mode 100644
index 0000000..4a45d91
--- /dev/null
+++ b/share/doc/iso/wisc/figs/trans_flow.grn
@@ -0,0 +1,20 @@
+.(z
+.GS C
+width 6.0
+high 7.0
+1 8
+2 10
+3 12
+4 14
+sc 0.4
+narrow 1
+medium 3
+thick 7
+pointscale off
+file trans_flow.gsrc
+.GE
+.ce
+\fB Figure \n+(FG\fR: Control flow (solid) and data flow (broken)
+.ce
+among the parts of the transport implementation.
+.)z
diff --git a/share/doc/iso/wisc/figs/trans_flow.gsrc b/share/doc/iso/wisc/figs/trans_flow.gsrc
new file mode 100644
index 0000000..1b96523
--- /dev/null
+++ b/share/doc/iso/wisc/figs/trans_flow.gsrc
@@ -0,0 +1,567 @@
+gremlinfile
+0 448.00 587.00
+3
+448.00 201.00
+450.69 206.18
+450.55 202.57
+453.83 201.07
+448.00 201.00
+-1.00 -1.00
+6 0
+0
+ 3
+549.00 260.00
+447.00 201.00
+-1.00 -1.00
+6 0
+0
+ 3
+582.00 238.00
+585.82 233.60
+582.52 235.05
+579.91 232.55
+582.00 238.00
+-1.00 -1.00
+6 0
+0
+ 3
+585.00 148.00
+583.00 239.00
+-1.00 -1.00
+6 0
+0
+ 3
+423.00 376.00
+428.82 375.64
+425.47 374.30
+425.42 370.70
+423.00 376.00
+-1.00 -1.00
+6 0
+0
+ 3
+542.00 322.00
+422.00 375.00
+-1.00 -1.00
+6 0
+0
+ 3
+393.00 246.00
+389.63 250.76
+392.77 248.99
+395.61 251.22
+393.00 246.00
+-1.00 -1.00
+6 0
+0
+ 3
+390.00 364.00
+392.00 244.00
+-1.00 -1.00
+6 0
+0
+ 3
+476.00 655.00
+481.66 653.59
+478.12 652.88
+477.41 649.34
+476.00 655.00
+-1.00 -1.00
+6 0
+0
+ 3
+540.00 577.00
+476.00 656.00
+-1.00 -1.00
+6 0
+0
+ 3
+138.00 209.00
+139.96 203.51
+137.41 206.06
+134.08 204.69
+138.00 209.00
+-1.00 -1.00
+6 0
+0
+ 3
+137.00 209.00
+124.00 149.00
+-1.00 -1.00
+6 0
+0
+ 3
+378.00 242.00
+375.00 247.00
+378.00 245.00
+381.00 247.00
+378.00 242.00
+-1.00 -1.00
+1 0
+0
+ 3
+376.00 364.00
+378.00 239.00
+-1.00 -1.00
+1 0
+0
+ 3
+441.00 215.00
+443.50 220.27
+443.50 216.66
+446.82 215.28
+441.00 215.00
+-1.00 -1.00
+1 0
+0
+ 3
+541.00 269.00
+438.00 214.00
+-1.00 -1.00
+1 0
+0
+ 3
+600.00 240.00
+603.25 235.16
+600.15 237.00
+597.25 234.86
+600.00 240.00
+-1.00 -1.00
+1 0
+0
+ 3
+599.00 242.00
+605.00 148.00
+-1.00 -1.00
+1 0
+0
+ 3
+497.00 138.00
+491.22 137.21
+494.24 139.18
+493.59 142.73
+497.00 138.00
+-1.00 -1.00
+1 0
+0
+ 3
+441.00 165.00
+495.00 138.00
+-1.00 -1.00
+1 0
+0
+ 3
+423.00 231.00
+423.07 236.83
+424.57 233.55
+428.18 233.69
+423.00 231.00
+-1.00 -1.00
+1 0
+0
+ 3
+563.00 479.00
+421.00 231.00
+-1.00 -1.00
+1 0
+0
+ 3
+433.00 394.00
+438.82 394.29
+435.65 392.59
+436.00 389.00
+433.00 394.00
+-1.00 -1.00
+1 0
+0
+ 3
+554.00 335.00
+434.00 391.00
+-1.00 -1.00
+1 0
+0
+ 3
+176.00 317.00
+178.50 322.27
+178.50 318.66
+181.82 317.28
+176.00 317.00
+-1.00 -1.00
+1 0
+0
+ 3
+325.00 413.00
+175.00 315.00
+-1.00 -1.00
+1 0
+0
+ 3
+343.00 378.00
+340.62 372.67
+340.54 376.28
+337.18 377.59
+343.00 378.00
+-1.00 -1.00
+1 0
+0
+ 3
+199.00 285.00
+342.00 377.00
+-1.00 -1.00
+1 0
+0
+ 3
+344.00 226.00
+338.60 228.20
+342.20 228.40
+343.40 231.80
+344.00 226.00
+-1.00 -1.00
+1 0
+0
+ 3
+197.00 458.00
+342.00 226.00
+-1.00 -1.00
+1 0
+0
+ 3
+523.00 513.00
+520.80 507.60
+520.60 511.20
+517.20 512.40
+523.00 513.00
+-1.00 -1.00
+1 0
+0
+ 3
+424.00 461.00
+522.00 512.00
+-1.00 -1.00
+1 0
+0
+ 3
+553.00 583.00
+547.45 584.79
+551.02 585.26
+551.97 588.74
+553.00 583.00
+-1.00 -1.00
+1 0
+0
+ 3
+491.00 657.00
+553.00 583.00
+-1.00 -1.00
+1 0
+0
+ 3
+235.00 559.00
+235.65 564.79
+236.82 561.38
+240.42 561.15
+235.00 559.00
+-1.00 -1.00
+1 0
+0
+ 3
+304.00 656.00
+233.00 556.00
+-1.00 -1.00
+1 0
+0
+ 4
+383.00 420.00
+354.00 467.00
+383.00 364.77
+383.00 475.23
+438.23 420.00
+327.77 420.00
+-1.00 -1.00
+5 0
+0
+ 4
+189.00 515.00
+160.00 562.00
+189.00 459.77
+189.00 570.23
+244.23 515.00
+133.77 515.00
+-1.00 -1.00
+5 0
+0
+ 4
+577.00 532.00
+548.00 579.00
+577.00 476.77
+577.00 587.23
+632.23 532.00
+521.77 532.00
+-1.00 -1.00
+5 0
+0
+ 4
+592.00 296.00
+563.00 343.00
+592.00 240.77
+592.00 351.23
+647.23 296.00
+536.77 296.00
+-1.00 -1.00
+5 0
+0
+ 4
+388.00 185.00
+359.00 232.00
+388.00 129.77
+388.00 240.23
+443.23 185.00
+332.77 185.00
+-1.00 -1.00
+5 0
+0
+ 4
+145.00 265.00
+116.00 312.00
+145.00 209.77
+145.00 320.23
+200.23 265.00
+89.77 265.00
+-1.00 -1.00
+5 0
+0
+ 3
+282.00 708.00
+282.00 658.00
+500.00 658.00
+500.00 708.00
+282.00 708.00
+-1.00 -1.00
+5 0
+0
+ 3
+384.00 660.00
+384.00 484.00
+-1.00 -1.00
+6 0
+0
+ 3
+384.00 484.00
+381.00 489.00
+384.00 487.00
+387.00 489.00
+384.00 484.00
+-1.00 -1.00
+6 0
+0
+ 3
+552.00 484.00
+408.00 236.00
+-1.00 -1.00
+6 0
+0
+ 3
+408.00 236.00
+408.28 241.82
+409.66 238.50
+413.27 238.50
+408.00 236.00
+-1.00 -1.00
+6 0
+0
+ 3
+432.00 452.00
+528.00 500.00
+-1.00 -1.00
+6 0
+0
+ 3
+528.00 500.00
+525.50 494.73
+525.50 498.34
+522.18 499.72
+528.00 500.00
+-1.00 -1.00
+6 0
+0
+ 3
+240.00 484.00
+328.00 444.00
+-1.00 -1.00
+6 0
+0
+ 3
+240.00 484.00
+245.69 485.26
+242.85 483.05
+243.79 479.57
+240.00 484.00
+-1.00 -1.00
+6 0
+0
+ 0
+320.00 668.00
+320.00 681.00
+320.00 681.00
+320.00 681.00
+-1.00 -1.00
+3 2
+11 SOCKET CODE
+ 0
+512.00 116.00
+512.00 129.00
+512.00 129.00
+512.00 129.00
+-1.00 -1.00
+3 2
+13 NETWORK LEVEL
+ 3
+496.00 148.00
+496.00 100.00
+704.00 100.00
+704.00 148.00
+496.00 148.00
+-1.00 -1.00
+6 0
+0
+ 0
+64.00 116.00
+64.00 129.00
+64.00 129.00
+64.00 129.00
+-1.00 -1.00
+3 2
+5 CLOCK
+ 3
+48.00 148.00
+48.00 100.00
+160.00 100.00
+160.00 148.00
+48.00 148.00
+-1.00 -1.00
+6 0
+0
+ 0
+160.00 500.00
+160.00 513.00
+160.00 513.00
+160.00 513.00
+-1.00 -1.00
+3 2
+4 SEND
+ 0
+544.00 524.00
+544.00 537.00
+544.00 537.00
+544.00 537.00
+-1.00 -1.00
+3 2
+4 RECV
+ 0
+352.00 421.00
+352.00 434.00
+352.00 434.00
+352.00 434.00
+-1.00 -1.00
+3 2
+6 DRIVER
+ 0
+105.00 264.00
+105.00 277.00
+105.00 277.00
+105.00 277.00
+-1.00 -1.00
+3 2
+6 TIMERS
+ 0
+560.00 276.00
+560.00 289.00
+560.00 289.00
+560.00 289.00
+-1.00 -1.00
+3 2
+5 INPUT
+ 0
+349.00 181.00
+349.00 194.00
+349.00 194.00
+349.00 194.00
+-1.00 -1.00
+3 2
+6 OUTPUT
+ 3
+192.00 292.00
+336.00 388.00
+-1.00 -1.00
+6 0
+0
+ 3
+336.00 388.00
+334.59 382.34
+333.88 385.88
+330.34 386.59
+336.00 388.00
+-1.00 -1.00
+6 0
+0
+ 3
+328.00 404.00
+184.00 308.00
+-1.00 -1.00
+6 0
+0
+ 3
+184.00 308.00
+187.13 312.92
+186.68 309.34
+189.81 307.55
+184.00 308.00
+-1.00 -1.00
+6 0
+0
+ 3
+208.00 460.00
+352.00 236.00
+-1.00 -1.00
+6 0
+0
+ 3
+352.00 236.00
+347.08 239.13
+350.66 238.68
+352.45 241.81
+352.00 236.00
+-1.00 -1.00
+6 0
+0
+ 3
+432.00 148.00
+496.00 116.00
+-1.00 -1.00
+6 0
+0
+ 3
+496.00 116.00
+491.00 113.00
+493.00 116.00
+491.00 119.00
+496.00 116.00
+-1.00 -1.00
+6 0
+0
+ 3
+224.00 564.00
+288.00 660.00
+-1.00 -1.00
+6 0
+0
+ 3
+288.00 660.00
+288.45 654.19
+286.66 657.32
+283.08 656.87
+288.00 660.00
+-1.00 -1.00
+6 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/trans_flow.nr b/share/doc/iso/wisc/figs/trans_flow.nr
new file mode 100644
index 0000000..2b8061c
--- /dev/null
+++ b/share/doc/iso/wisc/figs/trans_flow.nr
@@ -0,0 +1,274 @@
+.(z
+.br
+.nr g1 3456u
+.nr g2 3202u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+.sp 253u
+\h'1265u'\D'l 2u 30u'\D'l -9u -16u'\D'l -19u 2u'\D'l 26u -16u'
+.sp -1
+.sp 505u
+\h'928u'\D'l 337u -505u'
+.sp -1
+.sp 2359u
+\h'2361u'\D'l -27u 16u'\D'l 11u -16u'\D'l -11u -15u'\D'l 27u 15u'
+.sp -1
+.sp -168u
+\h'2023u'\D'l 338u 168u'
+.sp -1
+.sp -464u
+\h'1602u'\D'l -26u -16u'\D'l 19u 2u'\D'l 9u -16u'\D'l -2u 30u'
+.sp -1
+.sp -1180u
+\h'843u'\D'l 759u 1180u'
+.sp -1
+.sp 801u
+\h'717u'\D'l 16u -26u'\D'l -2u 19u'\D'l 16u 9u'\D'l -30u -2u'
+.sp -1
+.sp -506u
+\h'1475u'\D'l -758u 506u'
+.sp -1
+.sp 84u
+\h'1518u'\D'l -8u 30u'\D'l -4u -18u'\D'l -18u -4u'\D'l 30u -8u'
+.sp -1
+.sp 506u
+\h'759u'\D'l 759u -506u'
+.sp -1
+.ft B
+.ps 10
+.nr g8 \n(.d
+.ds g9 "OUTPUT
+.sp 585u
+\h'1586u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 10
+.nr g8 \n(.d
+.ds g9 "INPUT
+.sp 85u
+\h'2698u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 10
+.nr g8 \n(.d
+.ds g9 "TIMERS
+.sp 148u
+\h'301u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 10
+.nr g8 \n(.d
+.ds g9 "DRIVER
+.sp -679u
+\h'1602u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 10
+.nr g8 \n(.d
+.ds g9 "RECV
+.sp -1221u
+\h'2613u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 10
+.nr g8 \n(.d
+.ds g9 "SEND
+.sp -1096u
+\h'590u'\&\*(g9
+.sp |\n(g8u
+.sp 759u
+\D'l 0u 253u'\D'l 590u 0u'\D'l 0u -253u'\D'l -590u 0u'
+.sp -1
+.ft B
+.ps 10
+.nr g8 \n(.d
+.ds g9 "CLOCK
+.sp 168u
+\h'85u'\&\*(g9
+.sp |\n(g8u
+\h'2361u'\D'l 0u 253u'\D'l 1095u 0u'\D'l 0u -253u'\D'l -1095u 0u'
+.sp -1
+.ft B
+.ps 10
+.nr g8 \n(.d
+.ds g9 "NETWORK LEVEL
+.sp 168u
+\h'2445u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 10
+.nr g8 \n(.d
+.ds g9 "SOCKET CODE
+.sp -2739u
+\h'1433u'\&\*(g9
+.sp |\n(g8u
+.sp -1770u
+\h'1012u'\D'l 30u -7u'\D'l -15u 12u'\D'l 5u 18u'\D'l -20u -23u'
+.sp -1
+\h'1012u'\D'l 463u 210u'
+.sp -1
+.sp -85u
+\h'2529u'\D'l -13u 28u'\D'l 0u -19u'\D'l -18u -7u'\D'l 31u -2u'
+.sp -1
+.sp 253u
+\h'2023u'\D'l 506u -253u'
+.sp -1
+.sp 1138u
+\h'1897u'\D'l 1u -30u'\D'l 8u 17u'\D'l 19u 0u'\D'l -28u 13u'
+.sp -1
+.sp -1306u
+\h'2656u'\D'l -759u 1306u'
+.sp -1
+\h'1771u'\D'l -16u -27u'\D'l 16u 11u'\D'l 15u -11u'\D'l -15u 27u'
+.sp -1
+.sp -926u
+\h'1771u'\D'l 0u 926u'
+.sp -1
+\D't 1u'
+.sp -1
+.sp -253u
+\h'1233u'\D'l 0u 263u'\D'l 1149u 0u'\D'l 0u -263u'\D'l -1149u 0u'
+.sp -1
+.sp 2332u
+\h'220u'\D'c 581u'
+.sp -1
+.sp 422u
+\h'1501u'\D'c 581u'
+.sp -1
+.sp -585u
+\h'2575u'\D'c 581u'
+.sp -1
+.sp -1242u
+\h'2496u'\D'c 581u'
+.sp -1
+.sp 89u
+\h'452u'\D'c 581u'
+.sp -1
+.sp 500u
+\h'1474u'\D'c 581u'
+.sp -1
+\D's 4u'
+.sp -1
+.sp -1242u
+\h'1349u'\D'l -374u 526u'
+.sp -1
+.sp 511u
+\h'986u'\D'l 3u -31u'\D'l 6u 18u'\D'l 19u 1u'\D'l -28u 12u'
+.sp -1
+.sp -517u
+\h'2334u'\D'l 327u 390u'
+.sp -1
+.sp 390u
+\h'2661u'\D'l -29u -9u'\D'l 18u -3u'\D'l 5u -18u'\D'l 6u 30u'
+.sp -1
+.sp 642u
+\h'1981u'\D'l 517u -268u'
+.sp -1
+.sp -273u
+\h'2503u'\D'l -12u 27u'\D'l -1u -18u'\D'l -18u -6u'\D'l 31u -3u'
+.sp -1
+.sp 289u
+\h'785u'\D'l 764u 1222u'
+.sp -1
+.sp 1222u
+\h'1560u'\D'l -29u -12u'\D'l 19u -1u'\D'l 7u -18u'\D'l 3u 31u'
+.sp -1
+.sp -311u
+\h'796u'\D'l 753u -485u'
+.sp -1
+.sp -490u
+\h'1555u'\D'l -13u 28u'\D'l 0u -19u'\D'l -18u -7u'\D'l 31u -2u'
+.sp -1
+.sp -184u
+\h'1460u'\D'l -791u 516u'
+.sp -1
+.sp 506u
+\h'675u'\D'l 13u -28u'\D'l 0u 19u'\D'l 17u 7u'\D'l -30u 2u'
+.sp -1
+.sp -95u
+\h'2666u'\D'l -632u -295u'
+.sp -1
+.sp -311u
+\h'2029u'\D'l 30u -2u'\D'l -16u 9u'\D'l 1u 19u'\D'l -15u -26u'
+.sp -1
+.sp -448u
+\h'2714u'\D'l -749u 1307u'
+.sp -1
+.sp 1307u
+\h'1976u'\D'l 0u -31u'\D'l 8u 17u'\D'l 19u -1u'\D'l -27u 15u'
+.sp -1
+.sp 347u
+\h'2071u'\D'l 284u 143u'
+.sp -1
+.sp 143u
+\h'2366u'\D'l -31u 4u'\D'l 16u -11u'\D'l -3u -18u'\D'l 18u 25u'
+.sp -1
+.sp -548u
+\h'2903u'\D'l 32u 495u'
+.sp -1
+.sp 10u
+\h'2908u'\D'l 18u 26u'\D'l -17u -10u'\D'l -15u 11u'\D'l 14u -27u'
+.sp -1
+.sp -153u
+\h'2598u'\D'l -543u 290u'
+.sp -1
+.sp 285u
+\h'2071u'\D'l 13u -28u'\D'l 0u 19u'\D'l 17u 7u'\D'l -30u 2u'
+.sp -1
+.sp -785u
+\h'1728u'\D'l 11u 658u'
+.sp -1
+.sp 643u
+\h'1739u'\D'l -16u -27u'\D'l 16u 11u'\D'l 16u -11u'\D'l -16u 27u'
+.sp -1
+\D's -1u'\D't 3u'
+.sp -1
+.sp 174u
+\h'469u'\D'l -68u 316u'
+.sp -1
+\h'475u'\D'l 10u 28u'\D'l -14u -13u'\D'l -17u 7u'\D'l 21u -22u'
+.sp -1
+.sp -1938u
+\h'2592u'\D'l -337u -416u'
+.sp -1
+.sp -411u
+\h'2255u'\D'l 30u 7u'\D'l -19u 4u'\D'l -3u 19u'\D'l -8u -30u'
+.sp -1
+.sp 1532u
+\h'1802u'\D'l 11u 632u'
+.sp -1
+.sp 622u
+\h'1818u'\D'l -18u -25u'\D'l 17u 9u'\D'l 15u -12u'\D'l -14u 28u'
+.sp -1
+.sp -401u
+\h'2603u'\D'l -632u -279u'
+.sp -1
+.sp -284u
+\h'1976u'\D'l 31u 2u'\D'l -18u 7u'\D'l 0u 19u'\D'l -13u -28u'
+.sp -1
+.sp 1201u
+\h'2829u'\D'l -10u -480u'
+.sp -1
+.sp -474u
+\h'2814u'\D'l 20u 23u'\D'l -18u -8u'\D'l -13u 13u'\D'l 11u -28u'
+.sp -1
+.sp -116u
+\h'2640u'\D'l -538u 311u'
+.sp -1
+.sp 311u
+\h'2108u'\D'l 14u -28u'\D'l -1u 19u'\D'l 17u 8u'\D'l -30u 1u'
+.sp -1
+.sp 532u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fB Figure \n+(FG\fR: Control flow (solid) and data flow (broken)
+.ce
+among the parts of the transport implementation.
+.)z
diff --git a/share/doc/iso/wisc/figs/unix_ipc.grn b/share/doc/iso/wisc/figs/unix_ipc.grn
new file mode 100644
index 0000000..7c06d27
--- /dev/null
+++ b/share/doc/iso/wisc/figs/unix_ipc.grn
@@ -0,0 +1,18 @@
+.(z L
+.GS C
+width 6.0
+high 7.0
+1 8
+2 10
+3 12
+4 14
+sc 0.3
+narrow 1
+medium 3
+thick 7
+pointscale off
+file unix_ipc.gsrc
+.GE
+.ce
+\fBFigure \n+(FG\fR: IPC in 4.2 Unix
+.)z
diff --git a/share/doc/iso/wisc/figs/unix_ipc.gsrc b/share/doc/iso/wisc/figs/unix_ipc.gsrc
new file mode 100644
index 0000000..cafe972
--- /dev/null
+++ b/share/doc/iso/wisc/figs/unix_ipc.gsrc
@@ -0,0 +1,1041 @@
+gremlinfile
+0 384.00 408.00
+3
+384.00 380.00
+387.00 375.00
+384.00 377.00
+381.00 375.00
+384.00 380.00
+-1.00 -1.00
+6 0
+0
+ 3
+392.00 612.00
+608.00 612.00
+-1.00 -1.00
+6 0
+0
+ 3
+248.00 612.00
+32.00 612.00
+-1.00 -1.00
+6 0
+0
+ 4
+176.00 692.00
+176.00 740.00
+176.00 644.00
+176.00 740.00
+224.00 692.00
+128.00 692.00
+-1.00 -1.00
+6 0
+0
+ 4
+160.00 532.00
+160.00 580.00
+160.00 484.00
+160.00 580.00
+208.00 532.00
+112.00 532.00
+-1.00 -1.00
+6 0
+0
+ 4
+528.00 340.00
+528.00 388.00
+528.00 292.00
+528.00 388.00
+576.00 340.00
+480.00 340.00
+-1.00 -1.00
+6 0
+0
+ 4
+544.00 124.00
+544.00 188.00
+544.00 60.00
+544.00 188.00
+608.00 124.00
+480.00 124.00
+-1.00 -1.00
+6 0
+0
+ 4
+320.00 604.00
+320.00 676.00
+320.00 532.00
+320.00 676.00
+392.00 604.00
+248.00 604.00
+-1.00 -1.00
+6 0
+0
+ 0
+144.00 676.00
+144.00 692.00
+144.00 692.00
+144.00 692.00
+-1.00 -1.00
+3 3
+7 program
+ 0
+152.00 700.00
+152.00 716.00
+152.00 716.00
+152.00 716.00
+-1.00 -1.00
+3 3
+4 user
+ 0
+288.00 628.00
+288.00 644.00
+288.00 644.00
+288.00 644.00
+-1.00 -1.00
+3 3
+9 C library
+ 0
+280.00 572.00
+280.00 588.00
+280.00 588.00
+280.00 588.00
+-1.00 -1.00
+3 3
+12 system calls
+ 0
+144.00 532.00
+144.00 548.00
+144.00 548.00
+144.00 548.00
+-1.00 -1.00
+3 3
+5 clock
+ 0
+512.00 132.00
+512.00 148.00
+512.00 148.00
+512.00 148.00
+-1.00 -1.00
+3 3
+8 network
+ 0
+512.00 108.00
+512.00 124.00
+512.00 124.00
+512.00 124.00
+-1.00 -1.00
+3 3
+9 interface
+ 0
+512.00 84.00
+512.00 100.00
+512.00 100.00
+512.00 100.00
+-1.00 -1.00
+3 3
+7 drivers
+ 0
+32.00 628.00
+32.00 644.00
+32.00 644.00
+32.00 644.00
+-1.00 -1.00
+2 3
+4 user
+ 0
+32.00 580.00
+32.00 596.00
+32.00 596.00
+32.00 596.00
+-1.00 -1.00
+2 3
+6 kernel
+ 3
+248.00 612.00
+392.00 612.00
+-1.00 -1.00
+1 0
+0
+ 0
+488.00 692.00
+488.00 708.00
+488.00 708.00
+488.00 708.00
+-1.00 -1.00
+3 3
+7 network
+ 0
+480.00 668.00
+480.00 684.00
+480.00 684.00
+480.00 684.00
+-1.00 -1.00
+3 3
+10 management
+ 4
+528.00 692.00
+528.00 756.00
+528.00 628.00
+528.00 756.00
+592.00 692.00
+464.00 692.00
+-1.00 -1.00
+6 0
+0
+ 4
+528.00 500.00
+528.00 548.00
+528.00 452.00
+528.00 548.00
+576.00 500.00
+480.00 500.00
+-1.00 -1.00
+6 0
+0
+ 0
+504.00 508.00
+504.00 524.00
+504.00 524.00
+504.00 524.00
+-1.00 -1.00
+3 3
+7 routing
+ 0
+504.00 484.00
+504.00 500.00
+504.00 500.00
+504.00 500.00
+-1.00 -1.00
+3 3
+6 tables
+ 4
+320.00 436.00
+320.00 516.00
+320.00 356.00
+320.00 516.00
+400.00 436.00
+240.00 436.00
+-1.00 -1.00
+6 0
+0
+ 0
+288.00 420.00
+288.00 436.00
+288.00 436.00
+288.00 436.00
+-1.00 -1.00
+3 3
+7 sockets
+ 3
+80.00 356.00
+80.00 100.00
+208.00 100.00
+208.00 356.00
+80.00 356.00
+-1.00 -1.00
+6 0
+0
+ 0
+112.00 292.00
+112.00 308.00
+112.00 308.00
+112.00 308.00
+-1.00 -1.00
+3 3
+9 transport
+ 0
+112.00 244.00
+112.00 260.00
+112.00 260.00
+112.00 260.00
+-1.00 -1.00
+3 3
+8 protocol
+ 0
+112.00 188.00
+112.00 204.00
+112.00 204.00
+112.00 204.00
+-1.00 -1.00
+3 3
+6 switch
+ 4
+448.00 228.00
+448.00 276.00
+448.00 180.00
+448.00 276.00
+496.00 228.00
+400.00 228.00
+-1.00 -1.00
+6 0
+0
+ 4
+288.00 292.00
+288.00 340.00
+288.00 244.00
+288.00 340.00
+336.00 292.00
+240.00 292.00
+-1.00 -1.00
+6 0
+0
+ 4
+288.00 132.00
+288.00 180.00
+288.00 84.00
+288.00 180.00
+336.00 132.00
+240.00 132.00
+-1.00 -1.00
+6 0
+0
+ 0
+504.00 340.00
+504.00 356.00
+504.00 356.00
+504.00 356.00
+-1.00 -1.00
+3 3
+5 mbufs
+ 0
+264.00 292.00
+264.00 308.00
+264.00 308.00
+264.00 308.00
+-1.00 -1.00
+3 3
+7 proto 1
+ 0
+264.00 124.00
+264.00 140.00
+264.00 140.00
+264.00 140.00
+-1.00 -1.00
+3 3
+7 proto n
+ 3
+208.00 660.00
+256.00 636.00
+-1.00 -1.00
+6 0
+0
+ 3
+480.00 652.00
+384.00 628.00
+-1.00 -1.00
+6 0
+0
+ 3
+384.00 564.00
+480.00 516.00
+-1.00 -1.00
+6 0
+0
+ 3
+304.00 532.00
+304.00 516.00
+-1.00 -1.00
+6 0
+0
+ 5
+240.00 420.00
+176.00 420.00
+144.00 404.00
+128.00 372.00
+128.00 356.00
+-1.00 -1.00
+6 0
+0
+ 3
+216.00 292.00
+240.00 292.00
+-1.00 -1.00
+6 0
+0
+ 3
+216.00 132.00
+240.00 132.00
+-1.00 -1.00
+6 0
+0
+ 3
+336.00 292.00
+368.00 292.00
+-1.00 -1.00
+6 0
+0
+ 3
+336.00 132.00
+368.00 132.00
+-1.00 -1.00
+6 0
+0
+ 3
+368.00 132.00
+368.00 324.00
+-1.00 -1.00
+6 0
+0
+ 3
+512.00 180.00
+488.00 196.00
+-1.00 -1.00
+6 0
+0
+ 5
+368.00 324.00
+368.00 356.00
+368.00 364.00
+-1.00 -1.00
+6 0
+0
+ 3
+328.00 268.00
+400.00 244.00
+-1.00 -1.00
+6 0
+0
+ 3
+328.00 164.00
+400.00 204.00
+-1.00 -1.00
+6 0
+0
+ 3
+208.00 660.00
+213.81 660.45
+210.68 658.66
+211.13 655.08
+208.00 660.00
+-1.00 -1.00
+6 0
+0
+ 3
+256.00 636.00
+250.34 637.41
+253.88 638.12
+254.59 641.66
+256.00 636.00
+-1.00 -1.00
+6 0
+0
+ 3
+384.00 628.00
+389.00 631.00
+387.00 628.00
+389.00 625.00
+384.00 628.00
+-1.00 -1.00
+6 0
+0
+ 3
+480.00 652.00
+475.00 649.00
+477.00 652.00
+475.00 655.00
+480.00 652.00
+-1.00 -1.00
+6 0
+0
+ 3
+384.00 564.00
+389.81 564.45
+386.68 562.66
+387.13 559.08
+384.00 564.00
+-1.00 -1.00
+6 0
+0
+ 3
+480.00 516.00
+475.00 513.00
+477.00 516.00
+475.00 519.00
+480.00 516.00
+-1.00 -1.00
+6 0
+0
+ 3
+304.00 532.00
+307.00 527.00
+304.00 529.00
+301.00 527.00
+304.00 532.00
+-1.00 -1.00
+6 0
+0
+ 3
+304.00 516.00
+301.00 521.00
+304.00 519.00
+307.00 521.00
+304.00 516.00
+-1.00 -1.00
+6 0
+0
+ 3
+128.00 356.00
+125.00 361.00
+128.00 359.00
+131.00 361.00
+128.00 356.00
+-1.00 -1.00
+6 0
+0
+ 3
+240.00 292.00
+235.00 289.00
+237.00 292.00
+235.00 295.00
+240.00 292.00
+-1.00 -1.00
+6 0
+0
+ 3
+240.00 132.00
+235.00 129.00
+237.00 132.00
+235.00 135.00
+240.00 132.00
+-1.00 -1.00
+6 0
+0
+ 3
+368.00 364.00
+371.00 359.00
+368.00 361.00
+365.00 359.00
+368.00 364.00
+-1.00 -1.00
+6 0
+0
+ 3
+328.00 268.00
+333.81 268.45
+330.68 266.66
+331.13 263.08
+328.00 268.00
+-1.00 -1.00
+6 0
+0
+ 3
+400.00 244.00
+395.00 241.00
+397.00 244.00
+395.00 247.00
+400.00 244.00
+-1.00 -1.00
+6 0
+0
+ 3
+400.00 204.00
+398.59 198.34
+397.88 201.88
+394.34 202.59
+400.00 204.00
+-1.00 -1.00
+6 0
+0
+ 3
+328.00 164.00
+331.13 168.92
+330.68 165.34
+333.81 163.55
+328.00 164.00
+-1.00 -1.00
+6 0
+0
+ 3
+488.00 196.00
+493.66 194.59
+490.12 193.88
+489.41 190.34
+488.00 196.00
+-1.00 -1.00
+6 0
+0
+ 3
+512.00 180.00
+506.34 181.41
+509.88 182.12
+510.59 185.66
+512.00 180.00
+-1.00 -1.00
+6 0
+0
+ 0
+416.00 228.00
+416.00 244.00
+416.00 244.00
+416.00 244.00
+-1.00 -1.00
+3 3
+6 DoD IP
+ 0
+368.00 20.00
+368.00 36.00
+368.00 36.00
+368.00 36.00
+-1.00 -1.00
+2 3
+12 control flow
+ 0
+144.00 20.00
+144.00 36.00
+144.00 36.00
+144.00 36.00
+-1.00 -1.00
+2 3
+9 data flow
+ 3
+304.00 36.00
+352.00 36.00
+-1.00 -1.00
+6 0
+0
+ 3
+80.00 36.00
+128.00 36.00
+-1.00 -1.00
+1 0
+0
+ 3
+224.00 676.00
+264.00 652.00
+-1.00 -1.00
+1 0
+0
+ 3
+464.00 676.00
+384.00 644.00
+-1.00 -1.00
+1 0
+0
+ 3
+392.00 580.00
+488.00 532.00
+-1.00 -1.00
+1 0
+0
+ 3
+200.00 500.00
+248.00 468.00
+-1.00 -1.00
+1 0
+0
+ 5
+120.00 500.00
+48.00 372.00
+48.00 276.00
+48.00 196.00
+48.00 84.00
+64.00 68.00
+144.00 68.00
+480.00 68.00
+496.00 76.00
+-1.00 -1.00
+1 0
+0
+ 3
+152.00 68.00
+152.00 100.00
+-1.00 -1.00
+1 0
+0
+ 3
+432.00 52.00
+432.00 180.00
+-1.00 -1.00
+1 0
+0
+ 3
+216.00 116.00
+240.00 116.00
+-1.00 -1.00
+1 0
+0
+ 3
+216.00 276.00
+240.00 276.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 276.00
+384.00 276.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 116.00
+384.00 116.00
+-1.00 -1.00
+1 0
+0
+ 3
+384.00 116.00
+384.00 388.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 148.00
+408.00 188.00
+-1.00 -1.00
+1 0
+0
+ 3
+328.00 252.00
+400.00 228.00
+-1.00 -1.00
+1 0
+0
+ 3
+472.00 180.00
+488.00 164.00
+-1.00 -1.00
+1 0
+0
+ 3
+504.00 300.00
+480.00 268.00
+-1.00 -1.00
+1 0
+0
+ 3
+480.00 340.00
+328.00 324.00
+-1.00 -1.00
+1 0
+0
+ 3
+400.00 420.00
+488.00 372.00
+-1.00 -1.00
+1 0
+0
+ 3
+544.00 292.00
+544.00 188.00
+-1.00 -1.00
+1 0
+0
+ 3
+528.00 452.00
+528.00 388.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 532.00
+336.00 516.00
+-1.00 -1.00
+1 0
+0
+ 3
+224.00 676.00
+229.66 674.59
+226.12 673.88
+225.41 670.34
+224.00 676.00
+-1.00 -1.00
+1 0
+0
+ 3
+264.00 652.00
+258.34 653.41
+261.88 654.12
+262.59 657.66
+264.00 652.00
+-1.00 -1.00
+1 0
+0
+ 3
+384.00 644.00
+387.13 648.92
+386.68 645.34
+389.81 643.55
+384.00 644.00
+-1.00 -1.00
+1 0
+0
+ 3
+464.00 676.00
+462.59 670.34
+461.88 673.88
+458.34 674.59
+464.00 676.00
+-1.00 -1.00
+1 0
+0
+ 3
+392.00 580.00
+397.81 580.45
+394.68 578.66
+395.13 575.08
+392.00 580.00
+-1.00 -1.00
+1 0
+0
+ 3
+488.00 532.00
+482.19 531.55
+485.32 533.34
+484.87 536.92
+488.00 532.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 532.00
+339.00 527.00
+336.00 529.00
+333.00 527.00
+336.00 532.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 516.00
+333.00 521.00
+336.00 519.00
+339.00 521.00
+336.00 516.00
+-1.00 -1.00
+1 0
+0
+ 3
+248.00 468.00
+242.19 467.55
+245.32 469.34
+244.87 472.92
+248.00 468.00
+-1.00 -1.00
+1 0
+0
+ 3
+152.00 100.00
+152.00 84.00
+-1.00 -1.00
+1 0
+0
+ 3
+240.00 116.00
+235.00 113.00
+237.00 116.00
+235.00 119.00
+240.00 116.00
+-1.00 -1.00
+1 0
+0
+ 3
+152.00 100.00
+155.00 95.00
+152.00 97.00
+149.00 95.00
+152.00 100.00
+-1.00 -1.00
+1 0
+0
+ 3
+432.00 180.00
+435.00 175.00
+432.00 177.00
+429.00 175.00
+432.00 180.00
+-1.00 -1.00
+1 0
+0
+ 3
+472.00 180.00
+477.66 178.59
+474.12 177.88
+473.41 174.34
+472.00 180.00
+-1.00 -1.00
+1 0
+0
+ 3
+496.00 156.00
+490.34 157.41
+493.88 158.12
+494.59 161.66
+496.00 156.00
+-1.00 -1.00
+1 0
+0
+ 3
+496.00 76.00
+494.59 70.34
+493.88 73.88
+490.34 74.59
+496.00 76.00
+-1.00 -1.00
+1 0
+0
+ 3
+408.00 188.00
+406.59 182.34
+405.88 185.88
+402.34 186.59
+408.00 188.00
+-1.00 -1.00
+1 0
+0
+ 3
+336.00 148.00
+339.13 152.92
+338.68 149.34
+341.81 147.55
+336.00 148.00
+-1.00 -1.00
+1 0
+0
+ 3
+328.00 252.00
+333.81 252.45
+330.68 250.66
+331.13 247.08
+328.00 252.00
+-1.00 -1.00
+1 0
+0
+ 3
+400.00 228.00
+395.00 225.00
+397.00 228.00
+395.00 231.00
+400.00 228.00
+-1.00 -1.00
+1 0
+0
+ 3
+544.00 292.00
+547.00 287.00
+544.00 289.00
+541.00 287.00
+544.00 292.00
+-1.00 -1.00
+1 0
+0
+ 3
+504.00 300.00
+502.59 294.34
+501.88 297.88
+498.34 298.59
+504.00 300.00
+-1.00 -1.00
+1 0
+0
+ 3
+480.00 268.00
+481.41 273.66
+482.12 270.12
+485.66 269.41
+480.00 268.00
+-1.00 -1.00
+1 0
+0
+ 3
+400.00 420.00
+405.81 420.45
+402.68 418.66
+403.13 415.08
+400.00 420.00
+-1.00 -1.00
+1 0
+0
+ 3
+488.00 372.00
+482.19 371.55
+485.32 373.34
+484.87 376.92
+488.00 372.00
+-1.00 -1.00
+1 0
+0
+ 3
+528.00 452.00
+531.00 447.00
+528.00 449.00
+525.00 447.00
+528.00 452.00
+-1.00 -1.00
+1 0
+0
+ 3
+528.00 388.00
+525.00 393.00
+528.00 391.00
+531.00 393.00
+528.00 388.00
+-1.00 -1.00
+1 0
+0
+ 3
+480.00 340.00
+475.00 337.00
+477.00 340.00
+475.00 343.00
+480.00 340.00
+-1.00 -1.00
+1 0
+0
+ 3
+328.00 324.00
+333.00 327.00
+331.00 324.00
+333.00 321.00
+328.00 324.00
+-1.00 -1.00
+1 0
+0
+ 5
+240.00 404.00
+176.00 404.00
+160.00 388.00
+152.00 372.00
+152.00 356.00
+-1.00 -1.00
+1 0
+0
+ 3
+152.00 356.00
+149.00 361.00
+152.00 359.00
+155.00 361.00
+152.00 356.00
+-1.00 -1.00
+1 0
+0
+ 5
+480.00 324.00
+448.00 308.00
+416.00 292.00
+400.00 276.00
+336.00 212.00
+320.00 196.00
+312.00 180.00
+-1.00 -1.00
+1 0
+0
+ 3
+312.00 180.00
+311.55 185.81
+313.34 182.68
+316.92 183.13
+312.00 180.00
+-1.00 -1.00
+1 0
+0
+ 3
+480.00 324.00
+476.87 319.08
+477.32 322.66
+474.19 324.45
+480.00 324.00
+-1.00 -1.00
+1 0
+0
+ -1
diff --git a/share/doc/iso/wisc/figs/unix_ipc.nr b/share/doc/iso/wisc/figs/unix_ipc.nr
new file mode 100644
index 0000000..de24796
--- /dev/null
+++ b/share/doc/iso/wisc/figs/unix_ipc.nr
@@ -0,0 +1,499 @@
+.(z L
+.br
+.nr g1 3155u
+.nr g2 4031u
+.GS C
+.nr g3 \n(.f
+.nr g4 \n(.s
+\0
+.sp -1
+\D's 4u'\D't 1u'
+.sp -1
+.sp 2366u
+\h'2454u'\D'l -17u 27u'\D'l 2u -20u'\D'l -17u -10u'\D'l 32u 3u'
+.sp -1
+.sp 789u
+\h'1534u'\D'l -3u -32u'\D'l 10u 17u'\D'l 20u -2u'\D'l -27u 17u'
+.sp -1
+.sp -789u
+\h'2454u'\D'g -175u 88u -176u 87u -87u 88u -351u 350u -87u 88u -44u 88u'
+.sp -1
+.sp -175u
+\h'657u'\D'l -16u -28u'\D'l 16u 11u'\D'l 17u -11u'\D'l -17u 28u'
+.sp -1
+.sp -263u
+\h'1139u'\D'g -350u 0u -88u 87u -44u 88u 0u 88u'
+.sp -1
+.sp 438u
+\h'1621u'\D'l 28u -17u'\D'l -11u 17u'\D'l 11u 16u'\D'l -28u -16u'
+.sp -1
+.sp -88u
+\h'2454u'\D'l -27u 17u'\D'l 11u -17u'\D'l -11u -16u'\D'l 27u 16u'
+.sp -1
+.sp -263u
+\h'2717u'\D'l -16u -27u'\D'l 16u 11u'\D'l 16u -11u'\D'l -16u 27u'
+.sp -1
+.sp -350u
+\h'2717u'\D'l 16u 27u'\D'l -16u -11u'\D'l -16u 11u'\D'l 16u -27u'
+.sp -1
+.sp 438u
+\h'2498u'\D'l -32u 2u'\D'l 17u -9u'\D'l -2u -20u'\D'l 17u 27u'
+.sp -1
+.sp -263u
+\h'2016u'\D'l 32u -2u'\D'l -18u 9u'\D'l 3u 20u'\D'l -17u -27u'
+.sp -1
+.sp 833u
+\h'2454u'\D'l 8u -31u'\D'l 4u 19u'\D'l 19u 4u'\D'l -31u 8u'
+.sp -1
+.sp -176u
+\h'2586u'\D'l -8u 31u'\D'l -4u -19u'\D'l -19u -4u'\D'l 31u -8u'
+.sp -1
+.sp 44u
+\h'2805u'\D'l 16u 28u'\D'l -16u -11u'\D'l -17u 11u'\D'l 17u -28u'
+.sp -1
+.sp 351u
+\h'2016u'\D'l -28u 16u'\D'l 11u -16u'\D'l -11u -17u'\D'l 28u 17u'
+.sp -1
+.sp -132u
+\h'1621u'\D'l 32u -2u'\D'l -17u 10u'\D'l 3u 19u'\D'l -18u -27u'
+.sp -1
+.sp 570u
+\h'1665u'\D'l 17u -27u'\D'l -2u 20u'\D'l 17u 10u'\D'l -32u -3u'
+.sp -1
+.sp -219u
+\h'2060u'\D'l -8u 31u'\D'l -4u -19u'\D'l -19u -4u'\D'l 31u -8u'
+.sp -1
+.sp 614u
+\h'2542u'\D'l -8u 31u'\D'l -4u -20u'\D'l -19u -4u'\D'l 31u -7u'
+.sp -1
+.sp -439u
+\h'2542u'\D'l -31u -7u'\D'l 19u -4u'\D'l 4u -20u'\D'l 8u 31u'
+.sp -1
+.sp -131u
+\h'2410u'\D'l 31u 8u'\D'l -19u 3u'\D'l -4u 20u'\D'l -8u -31u'
+.sp -1
+\h'2191u'\D'l 17u 27u'\D'l -17u -11u'\D'l -16u 11u'\D'l 16u -27u'
+.sp -1
+.sp 438u
+\h'657u'\D'l 17u 27u'\D'l -17u -10u'\D'l -16u 10u'\D'l 16u -27u'
+.sp -1
+.sp -88u
+\h'1139u'\D'l -27u 17u'\D'l 11u -17u'\D'l -11u -16u'\D'l 27u 16u'
+.sp -1
+.sp 88u
+\h'657u'\D'l 0u 88u'
+.sp -1
+.sp -2016u
+\h'1183u'\D'l -32u 3u'\D'l 17u -10u'\D'l -2u -20u'\D'l 17u 27u'
+.sp -1
+.sp -262u
+\h'1665u'\D'l -16u -27u'\D'l 16u 11u'\D'l 17u -11u'\D'l -17u 27u'
+.sp -1
+.sp -88u
+\h'1665u'\D'l 17u 28u'\D'l -17u -11u'\D'l -16u 11u'\D'l 16u -28u'
+.sp -1
+\h'2498u'\D'l -32u 3u'\D'l 17u -10u'\D'l -2u -19u'\D'l 17u 26u'
+.sp -1
+.sp -262u
+\h'1972u'\D'l 32u -3u'\D'l -17u 10u'\D'l 2u 19u'\D'l -17u -26u'
+.sp -1
+.sp -526u
+\h'2366u'\D'l -7u 31u'\D'l -4u -20u'\D'l -20u -4u'\D'l 31u -7u'
+.sp -1
+.sp 175u
+\h'1928u'\D'l 17u -27u'\D'l -2u 20u'\D'l 17u 9u'\D'l -32u -2u'
+.sp -1
+.sp -44u
+\h'1271u'\D'l -31u -8u'\D'l 19u -4u'\D'l 4u -19u'\D'l 8u 31u'
+.sp -1
+.sp -131u
+\h'1052u'\D'l 31u 7u'\D'l -20u 4u'\D'l -4u 20u'\D'l -7u -31u'
+.sp -1
+.sp 788u
+\h'1665u'\D'l 0u 88u'
+.sp -1
+.sp 438u
+\h'2717u'\D'l 0u 350u'
+.sp -1
+.sp 876u
+\h'2805u'\D'l 0u 570u'
+.sp -1
+.sp -701u
+\h'2016u'\D'l 482u 263u'
+.sp -1
+.sp 438u
+\h'2454u'\D'l -833u 88u'
+.sp -1
+.sp 219u
+\h'2586u'\D'l -132u 176u'
+.sp -1
+.sp 658u
+\h'2410u'\D'l 88u 87u'
+.sp -1
+.sp -395u
+\h'1621u'\D'l 395u 132u'
+.sp -1
+.sp 570u
+\h'1665u'\D'l 395u -219u'
+.sp -1
+.sp 175u
+\h'1928u'\D'l 0u -1490u'
+.sp -1
+\h'1665u'\D'l 263u 0u'
+.sp -1
+.sp -876u
+\h'1665u'\D'l 263u 0u'
+.sp -1
+\h'1008u'\D'l 131u 0u'
+.sp -1
+.sp 876u
+\h'1008u'\D'l 131u 0u'
+.sp -1
+.sp 351u
+\h'2191u'\D'l 0u -701u'
+.sp -1
+.sp -88u
+\h'657u'\D'l 0u -175u'
+.sp -1
+.sp -2366u
+\h'482u'\D'g -395u 701u 0u 526u 0u 438u 0u 614u 88u 87u 438u 0u 1841u 0u 88u -43u'
+.sp -1
+\h'920u'\D'l 263u 175u'
+.sp -1
+.sp -437u
+\h'1972u'\D'l 526u 262u'
+.sp -1
+.sp -526u
+\h'2366u'\D'l -438u 175u'
+.sp -1
+\h'1052u'\D'l 219u 131u'
+.sp -1
+.sp 3505u
+\h'263u'\D'l 263u 0u'
+.sp -1
+\D's -1u'\D't 3u'
+.sp -1
+\h'1490u'\D'l 263u 0u'
+.sp -1
+.ft I
+.ps 12
+.nr g8 \n(.d
+.ds g9 "data flow
+.sp 87u
+\h'613u'\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 12
+.nr g8 \n(.d
+.ds g9 "control flow
+.sp 87u
+\h'1840u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "DoD IP
+.sp -1052u
+\h'2103u'\&\*(g9
+.sp |\n(g8u
+.sp -789u
+\h'2629u'\D'l -31u -8u'\D'l 20u -4u'\D'l 4u -19u'\D'l 7u 31u'
+.sp -1
+.sp -88u
+\h'2498u'\D'l 31u 8u'\D'l -19u 4u'\D'l -4u 19u'\D'l -8u -31u'
+.sp -1
+.sp 175u
+\h'1621u'\D'l 18u -26u'\D'l -3u 19u'\D'l 17u 10u'\D'l -32u -3u'
+.sp -1
+.sp -219u
+\h'2016u'\D'l -8u 31u'\D'l -4u -19u'\D'l -19u -4u'\D'l 31u -8u'
+.sp -1
+.sp -219u
+\h'2016u'\D'l -28u 17u'\D'l 11u -17u'\D'l -11u -16u'\D'l 28u 16u'
+.sp -1
+.sp -131u
+\h'1621u'\D'l 32u -3u'\D'l -17u 10u'\D'l 3u 20u'\D'l -18u -27u'
+.sp -1
+.sp -526u
+\h'1840u'\D'l 17u 27u'\D'l -17u -11u'\D'l -16u 11u'\D'l 16u -27u'
+.sp -1
+.sp 1271u
+\h'1139u'\D'l -27u 16u'\D'l 11u -16u'\D'l -11u -17u'\D'l 27u 17u'
+.sp -1
+.sp -877u
+\h'1139u'\D'l -27u 17u'\D'l 11u -17u'\D'l -11u -16u'\D'l 27u 16u'
+.sp -1
+.sp -350u
+\h'526u'\D'l -17u -28u'\D'l 17u 11u'\D'l 16u -11u'\D'l -16u 28u'
+.sp -1
+.sp -876u
+\h'1490u'\D'l -17u -27u'\D'l 17u 11u'\D'l 16u -11u'\D'l -16u 27u'
+.sp -1
+.sp -88u
+\h'1490u'\D'l 16u 28u'\D'l -16u -11u'\D'l -17u 11u'\D'l 17u -28u'
+.sp -1
+.sp 88u
+\h'2454u'\D'l -27u 17u'\D'l 11u -17u'\D'l -11u -16u'\D'l 27u 16u'
+.sp -1
+.sp -263u
+\h'1928u'\D'l 32u -2u'\D'l -17u 9u'\D'l 2u 20u'\D'l -17u -27u'
+.sp -1
+.sp -482u
+\h'2454u'\D'l -27u 17u'\D'l 11u -17u'\D'l -11u -16u'\D'l 27u 16u'
+.sp -1
+.sp 132u
+\h'1928u'\D'l 28u -17u'\D'l -11u 17u'\D'l 11u 16u'\D'l -28u -16u'
+.sp -1
+.sp -44u
+\h'1227u'\D'l -31u -8u'\D'l 19u -4u'\D'l 4u -19u'\D'l 8u 31u'
+.sp -1
+.sp -132u
+\h'964u'\D'l 32u -2u'\D'l -17u 10u'\D'l 2u 19u'\D'l -17u -27u'
+.sp -1
+.sp 2716u
+\h'1621u'\D'l 395u -219u'
+.sp -1
+.sp -569u
+\h'1621u'\D'l 395u 131u'
+.sp -1
+.sp -307u
+\h'1840u'\D'g 0u -175u 0u -44u'
+.sp -1
+.sp 789u
+\h'2629u'\D'l -131u -88u'
+.sp -1
+.sp 263u
+\h'1840u'\D'l 0u -1052u'
+.sp -1
+\h'1665u'\D'l 175u 0u'
+.sp -1
+.sp -877u
+\h'1665u'\D'l 175u 0u'
+.sp -1
+.sp 877u
+\h'1008u'\D'l 131u 0u'
+.sp -1
+.sp -877u
+\h'1008u'\D'l 131u 0u'
+.sp -1
+.sp -701u
+\h'1139u'\D'g -350u 0u -176u 88u -87u 175u 0u 88u'
+.sp -1
+.sp -613u
+\h'1490u'\D'l 0u 88u'
+.sp -1
+.sp -175u
+\h'1928u'\D'l 526u 263u'
+.sp -1
+.sp -482u
+\h'2454u'\D'l -526u 132u'
+.sp -1
+.sp -44u
+\h'964u'\D'l 263u 132u'
+.sp -1
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "proto n
+.sp 2936u
+\h'1271u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "proto 1
+.sp 2015u
+\h'1271u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "mbufs
+.sp 1752u
+\h'2586u'\&\*(g9
+.sp |\n(g8u
+.sp 2892u
+\h'1139u'\D'c 525u'
+.sp -1
+.sp -877u
+\h'1139u'\D'c 525u'
+.sp -1
+.sp 351u
+\h'2016u'\D'c 525u'
+.sp -1
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "switch
+.sp 219u
+\h'438u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "protocol
+.sp -88u
+\h'438u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "transport
+.sp -351u
+\h'438u'\&\*(g9
+.sp |\n(g8u
+.sp -701u
+\h'263u'\D'l 0u 1402u'\D'l 701u 0u'\D'l 0u -1402u'\D'l -701u 0u'
+.sp -1
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "sockets
+.sp -351u
+\h'1402u'\&\*(g9
+.sp |\n(g8u
+.sp -439u
+\h'1139u'\D'c 876u'
+.sp -1
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "tables
+.sp -263u
+\h'2586u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "routing
+.sp -394u
+\h'2586u'\&\*(g9
+.sp |\n(g8u
+.sp -350u
+\h'2454u'\D'c 525u'
+.sp -1
+.sp -1051u
+\h'2366u'\D'c 701u'
+.sp -1
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "management
+.sp 131u
+\h'2454u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "network
+\h'2498u'\&\*(g9
+.sp |\n(g8u
+\D's 4u'\D't 1u'
+.sp -1
+.sp 438u
+\h'1183u'\D'l 789u 0u'
+.sp -1
+.ft I
+.ps 12
+.nr g8 \n(.d
+.ds g9 "kernel
+.sp 176u
+\&\*(g9
+.sp |\n(g8u
+.ft I
+.ps 12
+.nr g8 \n(.d
+.ds g9 "user
+.sp -87u
+\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "drivers
+.sp 2892u
+\h'2629u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "interface
+.sp 2760u
+\h'2629u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "network
+.sp 2629u
+\h'2629u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "clock
+.sp 438u
+\h'613u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "system calls
+.sp 219u
+\h'1358u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "C library
+.sp -87u
+\h'1402u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "user
+.sp -482u
+\h'657u'\&\*(g9
+.sp |\n(g8u
+.ft B
+.ps 12
+.nr g8 \n(.d
+.ds g9 "program
+.sp -350u
+\h'613u'\&\*(g9
+.sp |\n(g8u
+\D's -1u'\D't 3u'
+.sp -1
+.sp 44u
+\h'1183u'\D'c 788u'
+.sp -1
+.sp 2629u
+\h'2454u'\D'c 701u'
+.sp -1
+.sp -1184u
+\h'2454u'\D'c 525u'
+.sp -1
+.sp -1051u
+\h'438u'\D'c 525u'
+.sp -1
+.sp -876u
+\h'526u'\D'c 525u'
+.sp -1
+.sp 438u
+\h'1183u'\D'l -1183u 0u'
+.sp -1
+\h'1972u'\D'l 1183u 0u'
+.sp -1
+.sp 1270u
+\h'1928u'\D'l 17u 28u'\D'l -17u -11u'\D'l -16u 11u'\D'l 16u -28u'
+.sp -1
+.sp 1972u
+\D't 3u'\D's -1u'
+.br
+.ft \n(g3
+.ps \n(g4
+.GE
+.ce
+\fBFigure \n+(FG\fR: IPC in 4.2 Unix
+.)z
diff --git a/share/doc/iso/wisc/intro.nr b/share/doc/iso/wisc/intro.nr
new file mode 100644
index 0000000..5e03515
--- /dev/null
+++ b/share/doc/iso/wisc/intro.nr
@@ -0,0 +1,76 @@
+.NC "Introduction"
+.sh 1 "Introduction"
+.pp
+This document describes the design and implementation of the ISO
+transport and network layers written for the ACIS Operating System,
+the IBM ACIS port of Berkeley 4.3 Unix\**
+.(f
+\** Unix is a registered trademark of AT&T.
+.)f
+for the IBM RT PC,
+hereafter called AOS.
+Collectively, this work is called the Wisconsin ARGO kernel.
+The ARGO kernel supports the
+the connection-oriented ISO transport service (COTS), the
+ISO connectionless network service (CLNS)
+and a
+connection-oriented network service (CONS).
+The COTS is provided by the ISO transport protocol TP,
+ISO 8073 Revised.
+The CLNS is provided by the connectionless network protocol,
+ISO 8473.
+The CONS is provided by the X.25 protocols.
+The ARGO implementation of the CONS is not a complete
+ISO CONS, but contains enough of the CONS to support
+the COTS and the CLNS (in the latter case, the CONS can be
+viewed as a subnetwork service).
+.pp
+The purposes of this document are
+.ip "1) "
+to describe the transport service and the software interface
+between the user and provider of this service,
+.ip "2) "
+to describe the network service and the software interface it
+provides,
+.ip "3) "
+to describe the design of the software which provides
+these services, and
+.ip "4) "
+to provide a guide for readers who are perusing or maintaining
+the ARGO kernel source code.
+.pp
+It is assumed that the reader is familiar with the \fBC\fR
+programming language,
+with Unix conventions, and with the ISO specifications listed in Appendix A.
+.sh 1 "Organization"
+.pp
+This document is composed of several chapters.
+Chapter One contains this introduction. Chapter Two presents a
+definition of terms and phrases used throughout the document.
+Chapter Three describes the transport service interface, which is
+the interface between the transport protocol implementation software and the transport user software.
+Chapter Four describes the network service interface, and the interface
+above and below the network layer.
+Chapter Five explains the format of an OSI address.
+Chapter Six describes the
+the architecture of the interprocess communication support in the
+kernel, which to a large degree mandates
+the design of a protocol implementation for a 4.3 Unix kernel.
+Chapter Seven describes the design of this transport
+protocol implementation,
+including descriptions of implementation options.
+Chapter Eight describes the design of the network layer implementation.
+Chapter Nine describes the way errors are handled in the system.
+Chapter Ten summarizes the methods used for
+testing and debugging the ARGO kernel.
+Appendix A is a list of the applicable ISO standards.
+.\" The manual pages relevant to the transport and network layers
+.\" are included as Appendix B.
+.pp
+Several conventions are followed in this document.
+All procedure names and system call names are followed
+by a pair of parentheses, for example,
+\fIread()\fR.
+References to manual pages consist of the name of the
+manual page, followed by the section in which
+the man page is found: \fIread(2)\fR.
diff --git a/share/doc/iso/wisc/ipc.nr b/share/doc/iso/wisc/ipc.nr
new file mode 100644
index 0000000..9f9d962
--- /dev/null
+++ b/share/doc/iso/wisc/ipc.nr
@@ -0,0 +1,372 @@
+.NC "The Design of Unix IPC"
+.sh 1 "General"
+.pp
+The ARGO implementation of
+TP and CLNP was designed to fit into the AOS
+kernel
+as easily as possible.
+All the standard protocol hooks are used.
+To understand the design, it is useful to have
+read
+Leffler, Joy, and Fabry:
+\*(lq4.2 BSD Networking Implementation Notes\*(rq July 1983.
+This section describes the
+design of the IPC support in the AOS kernel.
+.sh 1 "Functional Unit Overview"
+.pp
+The
+AOS
+kernel
+is a monolithic program of considerable size and complexity.
+The code can be separated into parts of distinct function,
+but there are no kernel processes per se.
+The kernel code is either executed on behalf of a user
+process, in which case the kernel was entered by a system call,
+or it is executed on behalf of a hardware or software interrupt.
+The following sections describe briefly the major functional units
+of the kernel.
+.\" FIGURE
+.so figs/func_units.nr
+.CF
+shows the arrangement of these kernel units and
+their interactions.
+.sh 2 "The file system."
+.pp
+.sh 2 "Virtual memory support."
+.pp
+This includes protection, swapping, paging, and
+text sharing.
+.sh 2 "Blocked device drivers (disks, tapes)."
+.pp
+All these drivers share some minor functional units,
+such as buffer management and bus support
+for the various types of busses on the machine.
+.sh 2 "Interprocess communication (IPC)."
+.pp
+This includes
+support for various protocols,
+buffer management, and a standard interface for inter-protocol
+communication.
+.sh 2 "Network interface drivers."
+.pp
+These drivers are closely tied to the IPC support.
+They use the IPC's buffer management unit rather
+than the buffers used by the blocked device drivers.
+The interface between these drivers and the rest of the kernel
+differs from the interface used by the blocked devices.
+.sh 2 "Tty driver"
+.pp
+This is terminal support, including the user interface
+and the device drivers.
+.sh 2 "System call interface."
+.pp
+This handles signals, traps, and system calls.
+.sh 2 "Clock."
+.pp
+The clock is used in various forms by many
+other units.
+.sh 2 "User process support (the rest)."
+.pp
+This includes support for accounting, process creation,
+control, scheduling, and destruction.
+.pp
+.sh 2 "IPC"
+.pp
+The major functional unit that supports IPC
+can be divided into the following smaller functional
+units.
+.sh 3 "Buffer management."
+.pp
+All protocols share a pool of buffers called \fImbufs\fR:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct mbuf {
+.T&
+l l l l.
++struct mbuf+*m_next;+/* next buffer in chain */
++u_long+m_off;+/* offset of data */
++short+m_len;+/* amount of data */
++short+m_type;+/* mbuf type (0 == free) */
++u_char+m_dat[MLEN];+/* data storage */
++struct mbuf+*m_act;+/* link in 2-d structure */
+};
+.TE
+\fR
+.)b
+.pp
+There are two forms of mbufs - small ones and large ones.
+Small ones are 128 octets in
+AOS
+and 256 octets
+in the ARGO release. Small mbufs are copied by byte-to-byte
+copies.
+The data in these mbufs are kept in the character
+array field \fIm_dat\fR in the mbuf structure
+itself.
+For this type of mbuf, the field \fIm_off\fR is positive,
+and is the offset to the beginning of the data from
+the beginning of the mbuf structure itself.
+Large mbufs, called \fIclusters\fR, are page-sized
+and page-aligned.
+They may be \*(lqcopied\*(rq by multiply mapping the pages they occupy.
+They consist of a page of memory plus a small mbuf structure
+whose fields are used
+to link clusters into chains, but whose \fIm_dat\fR array is
+not used.
+The \fIm_off\fR field of the structure
+is the offset (positive or negative) from the
+beginning of the mbuf structure to the beginning
+of the data page part of the cluster.
+In the case of clusters, the offset is always out of the
+bounds of the \fIm_dat\fR array and so it is alway possible
+to tell from the \fIm_off\fR field whether an mbuf structure
+is part of a cluster or is a small mbuf.
+All mbufs permanently reside in memory.
+The mbuf management unit manages its own page table.
+The mbuf manager keeps limited statistics on the quantities and
+types of buffers in use.
+Mbufs are used for many purposes, and most of these purposes
+have a type associated with them.
+Some of the types that buffers may take are
+MT_FREE (not allocated), MT_DATA,
+MT_HEADER, MT_SOCKET (socket structure),
+MT_PCB (protocol control block),
+MT_RTABLE (routing tables),
+and
+MT_SOOPTS (arguments passed to \fIgetsockopt()\fR and
+\fIsetsockopt()\fR.
+Data are passed among functional units by means
+of queues, the contents of which are
+either chains of mbufs or groups of chains of mbufs.
+Mbufs are linked into chains with the \fIm_next\fR field.
+Chains of mbufs are linked into groups with the \fIm_act\fR
+field.
+The \fIm_act\fR field allows a protocol to retain packet
+boundaries in a queue of mbufs.
+.sh 3 "Routing."
+.pp
+Routing decisions in the kernel are made by the procedure \fIrtalloc()\fR.
+This procedure will scan the kernel routing tables (stored in mbufs)
+looking for a route. A route is represented by
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct rtentry {
+.T&
+l l l l.
++u_long+rt_hash;+/* to speed lookups */
++struct sockaddr+rt_dst;+/* key */
++struct sockaddr+rt_gateway;+/* value */
++short+rt_flags;+/* up/down?, host/net */
++short+rt_refcnt;+/* # held references */
++u_long+rt_use;+/* raw # packets forwarded */
++struct ifnet+*rt_ifp;+/* interface to use */
+}
+.TE
+\fR
+.)b
+When looking for a route, \fIrtalloc()\fR will first hash the entire destination
+address, and scan the routing tables looking for a complete route. If a route
+is not found, then \fIrtalloc()\fR will rescan the table looking for a route
+which matches the \fInetwork\fR portion of the address. If a route is still
+not found, then a default route is used (if present).
+.pp
+If a route is found, the entity which called \fIrtalloc()\fR can use information
+from the \fIrtentry\fR structure to dispatch the datagram. Specifically, the
+datagram is queued on the interface identified by the interface
+pointer \fIrt_ifp\fR.
+.sh 3 "Socket code."
+.pp
+This is the protocol-independent part of the IPC support.
+Each communication endpoint (which may or may not be associated
+with a connection) is represented by the following structure:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct socket {
+.T&
+l l l l.
++short+so_type;+/* type, e.g. SOCK_DGRAM */
++short+so_options;+/* from socket call */
++short+so_linger;+/* time to linger @ close */
++short+so_state;+/* internal state flags */
++caddr_t+so_pcb;+/* network layer pcb */
++struct protosw+*so_proto;+/* protocol handle */
++struct socket+*so_head;+/* ptr to accept socket */
++struct socket+*so_q0;+/* queue of partial connX */
++short+so_q0len;+/* # partials on so_q0 */
++struct socket+*so_q;+/* queue of incoming connX */
++short+so_qlen;+/* # connections on so_q */
++short+so_qlimit;+/* max # queued connX */
++struct sockbuf+{
+++short+sb_cc;+/* actual chars in buffer */
+++short+sb_hiwat;+/* max actual char count */
+++short+sb_mbcnt;+/* chars of mbufs used */
+++short+sb_mbmax;+/* max chars of mbufs to use */
+++short+sb_lowat;+/* low water mark (not used yet) */
+++short+sb_timeo;+/* timeout (not used ) */
+++struct mbuf+*sb_mb;+/* the mbuf chain */
+++struct proc+*sb_sel;+/* process selecting */
+++short+sb_flags;+/* flags, see below */
++} so_rcv, so_snd;
++short+so_timeo;+/* connection timeout */
++u_short+so_error;+/* error affecting connX */
++short+so_oobmark;+/* oob mark (TCP only) */
++short+so_pgrp;+/* pgrp for signals */
+}
+.TE
+\fR
+.)b
+.pp
+The socket code maintains a pair of queues for each socket,
+\fIso_rcv\fR and \fIso_snd\fR.
+Each queue is associated with a count of the number of characters
+in the queue, the maximum number of characters allowed to be put
+in the queue, some status information (\fIsb_flags\fR), and
+several unused fields.
+For a send operation, data are copied from the user's address space
+into chains of mbufs.
+This is done by the socket module, which then calls the underlying
+transport protocol module to place the data
+on the send queue.
+This is generally done by
+appending to the chain beginning at \fIsb_mb\fR.
+The socket module copies data from the \fIso_rcv\fR queue
+to the user's address space to effect a receive operation.
+The underlying transport layer is expected to have put incoming
+data into \fIso_rcv\fR by calling procedures in this module.
+.in -5
+.sh 3 "Transport protocol management."
+.pp
+All protocols and address types must be \*(lqregistered\*(rq in a
+common way in order to use the IPC user interface.
+Each protocol must have an entry in a protocol switch table.
+Each entry takes the form:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct protosw {
+.T&
+l l l l.
++short+pr_type;+/* socket type used for */
++short+pr_family;+/* protocol family */
++short+pr_protocol;+/* protocol # from the database */
++short+pr_flags;+/* status information */
++++/* protocol-protocol hooks */
++int+(*pr_input)();+/* input (from below) */
++int+(*pr_output)();+/* output (from above) */
++int+(*pr_ctlinput)();+/* control input */
++int+(*pr_ctloutput)();+/* control output */
++++/* user-protocol hook */
++int+(*pr_usrreq)();+/* user request: see list below */
++++/* utility hooks */
++int+(*pr_init)();+/* initialization hook */
++int+(*pr_fasttimo)();+/* fast timeout (200ms) */
++int+(*pr_slowtimo)();+/* slow timeout (500ms) */
++int+(*pr_drain)();+/* free some space (not used) */
+}
+.TE
+\fR
+.)b
+.pp
+Associated with each protocol are the types of socket
+abstractions supported by the protocol (\fIpr_type\fR), the
+format of the addresses used by the protocol (\fIpr_family\fR),
+the routines to be called to perform
+a standard set of protocol functions (\fIpr_input\fR,...,\fIpr_drain\fR),
+and some status information (\fIpr_flags\fR).
+The field pr_flags keeps such information as
+SS_ISCONNECTED (this socket has a peer),
+SS_ISCONNECTING (this socket is in the process of establishing
+a connection),
+SS_ISDISCONNECTING (this socket is in the process of being disconnected),
+SS_CANTSENDMORE (this socket is half-closed and cannot send),
+SS_CANTRCVMORE (this socket is half-closed and cannot receive).
+There are some flags that are specific to the TCP concept
+of out-of-band data.
+A flag SS_OOBAVAIL was added for the ARGO implementation, to support
+the TP concept of out-of-band data (expedited data).
+.sh 3 "Network Interface Drivers"
+.pp
+The drivers for the devices attaching a Unix machine to a network
+medium share a common interface to the protocol
+software.
+There is a common data structure for managing queues,
+not surprisingly, a chain of mbufs.
+There is a set of macros that are used to enqueue and
+dequeue mbuf chains at high priority.
+A driver
+delivers an indication to a protocol entity when
+an incoming packet has been placed on a queue by
+issuing a
+software
+interrupt.
+.sh 3 "Support for individual protocols."
+.pp
+Each protocol is written as a separate functional unit.
+Because all protocols share the clock and the mbuf pool, they
+are not entirely insulated from each other.
+The details of TP are described in a section that
+follows.
+.\"*****************************************************
+.\" FIGURE
+.so figs/unix_ipc.nr
+.pp
+.CF
+shows the arrangement of the IPC support.
+.pp
+The AOS
+IPC was designed for DoD Internet protocols, all of
+which run over DoD IP.
+The assumptions that DoD Internet is the domain
+and that DoD IP is the network layer
+appear in the code and data structures in numerous places.
+For example, it is assumed that addresses can be compared
+by a bitwise comparison of 4 octets.
+Another example is that the transport protocols all directly call
+IP routines.
+There are no hooks in the data structures through
+which the transport layer can choose a network level protocol.
+A third example is that the host's local addresses
+are stored in the network interface drivers and the drivers
+have only one address - an Internet address.
+A fourth example is that headers are assumed to
+fit in one small mbuf (112 bytes for data in AOS).
+A fifth example is this:
+It is assumed in many places that buffer space is managed
+in units of characters or octets.
+The user data are copied from user address space into the kernel mbufs
+amorphously
+by the socket code, a protocol-independent part of the kernel.
+This is fine for a stream protocol, but it means that a
+packet protocol, in order to \*(lqpacketize\*(rq the data,
+must perform a memory-to-memory copy
+that might have been avoided had the protocol layer done the original
+copy from user address space.
+Furthermore, protocols that count credit in terms of packets or
+buffers rather than characters do not work efficiently because
+the computation of buffer space is not in the protocol module,
+but rather it is in the socket code module.
+This list of examples is not complete.
+.pp
+To summarize, adding a new transport protocol to the kernel consists of
+adding entries to the tables in the protocol management
+unit,
+modifying the network interface driver(s) to recognize
+new network protocol identifiers,
+adding the
+new system calls to the kernel and to the user library,
+and
+adding code modules for each of the protocols,
+and correcting deficiencies in the socket code,
+where the assumptions made about the nature of
+transport protocols do not apply.
diff --git a/share/doc/iso/wisc/macros.nr b/share/doc/iso/wisc/macros.nr
new file mode 100644
index 0000000..a3624c9
--- /dev/null
+++ b/share/doc/iso/wisc/macros.nr
@@ -0,0 +1,50 @@
+.\"
+.\" Macro to initialize chapter macros
+.\"
+.de IC
+.nr CN 0 1
+..
+.\"
+.\" Macro to begin new chapter
+.\"
+.de NC
+.he 'ARGO Kernel Programmer\'s Guide''Chapter \\n+(CN'
+.bp
+.sh 0 "_" 1 1 1 1 1 1
+.sz +2
+.(l C
+CHAPTER \\n(CN
+
+\fB\\$1\fR
+.)l
+.sp 1
+.(x
+Chapter \\n(CN \\$1
+.)x
+.sz -2
+..
+.\"
+.\" Figure conventions:
+.\" 1) do .so of figure source - figure reg incremented here
+.\" 2) make references to figure via CF
+.\"
+.\"
+.\" Macro to initialize figure register
+.\"
+.de IF
+.nr FG 0 1
+..
+.\"
+.\" Macro for current figure number
+.\"
+.de CF
+Figure \\n(FG
+..
+.\"
+.\" Define this macro to include section headings in table of contents
+.\"
+.de $0
+.(x
+Section \\$2 \\$1
+.)x
+..
diff --git a/share/doc/iso/wisc/net_design.nr b/share/doc/iso/wisc/net_design.nr
new file mode 100644
index 0000000..d066f8e
--- /dev/null
+++ b/share/doc/iso/wisc/net_design.nr
@@ -0,0 +1,1139 @@
+.NC "The Design of the ARGO Network Layer"
+.sh 1 "Connectionless Network Layer
+.pp
+The following sections describe the design of the ARGO
+connectionless network layer (CLNL).
+The connectionless network service is provided by several
+network-layer protocols: ES-IS (ISO 9542),
+CLNP (ISO 8348), and (ISO 8208) X.25.
+The protocol CLNP is the primary connectionless network layer
+protocol.
+It is supported by X.25 when X.25 is used as a subnetwork layer.
+X.25 can also be viewed as a link layer protocol in this context.
+The ES-IS protocol supports CLNP by providing the following functions:
+.ip \(bu 5
+automatic mapping of NSAP-addresses to SNPA addresses,
+.ip \(bu 5
+automatic configuration of networks of end systems and intermediate
+systems, and
+.ip \(bu 5
+redirection of network-layer traffic in response to
+configuration changes.
+.pp
+The rest of this chapter describes the design of
+CLNP, the design of ES-IS,
+and the design of the connection-oriented
+network layer, including the connection-oriented subnetwork service (X.25).
+.pp
+CLNP has two subsets defined: the Inactive Network Layer
+protocol subset and the Non-Segmenting protocol subset.
+The Inactive Network Layer subset is a null-function subset
+in which the CLNP is not needed, and the
+protocol consists of sending
+a 1-byte header containing the value zero.
+This "subset" is not supported in ARGO.
+.pp
+The Non-Segmenting protocol subset permits simplification of the DT NPDU
+header when it is known that segmentation of the DT NPDU is not required.
+ARGO supports this subset.
+When this subset is used,
+the segmentation part of the DT NPDU (data packet) header is not present,
+and the \fIdon't segment\fR bit is set in the
+fixed part of the header.
+This subset is chosen by setting the bit
+\fICLNP_NO_SEG\fR in the \fIflags\fR argument to \fIclnp_output()\fR.
+.pp
+Throughout the remainder of this
+document,
+following definitions apply:
+.(b
+\(bu DT NPDU: data transfer NPDU.
+\(bu ER NPDU: error report NPDU.
+\(bu NPDU: either an ER or DT NPDU.
+.)b
+.sh 2 "DT NPDU Output"
+.pp
+A CLNP DT NPDU is transmitted by calling \fIclnp_output()\fR.
+.so figs/clnp_output.nr
+.\" FIGURE
+.CF
+outlines the sequence of steps taken by \fIclnp_output()\fR when
+transmitting an NPDU.
+The solid lines indicate normal flow of control. The
+dashed lines indicate possible error returns (with associated
+error code).
+.pp
+\fIClnp_output()\fR will automatically cache (in the \fIisopcb\fR)
+the header of each packet it sends. This cached copy of the header
+is used on subsequent sends reducing the amount of time spent generating
+the header. Therefore, the first action \fIclnp_output()\fR takes is to
+examine the cached header (if any). If the header is still valid (see below)
+then it is used. Otherwise, a new header is built.
+.sh 3 "When The Cached Header Is Invalid"
+.pp
+Before any resources are allocated, the options to be sent with the packet
+are examined. If any unsupported options are present, the error \fIEINVAL\fR
+is returned.
+Next, the length of the source and destination
+NSAP addresses (taken from the \fIisopcb\fR)
+are checked. The source address length may be zero. This
+indicates that \fIclnp_output()\fR should compute the source address based upon
+the route taken, in which case CLNP calls
+the function \fIclnp_srcroute()\fR.
+Source routing
+will be discussed in detail later in this section.
+If, in the process of checking
+the address lengths, an invalid length is detected, the error
+\fIENAMETOOLONG\fR is returned.
+.pp
+After checking the lengths of the addresses,
+CLNP allocates an \fImbuf\fR in which the DT NPDU header will be constructed.
+If an \fImbuf\fR cannot be found, the error
+\fIENOBUFS\fR is returned. Once the \fImbuf\fR is allocated,
+the fixed part of the DT NPDU header is copied into the \fImbuf\fR.
+.pp
+The next step is to route the DT NPDU. This is accomplished by the
+\fIclnp_route()\fR function.
+It is necessary to route the datagram early in the output process because
+in many cases, the source address will not be known until the route
+has been created.
+When a system is multi-homed it has several source addresses.
+The source address to choose depends on the
+network interface (thus, the route) used.
+.pp
+The address part of the DT NPDU follows the fixed part.
+Since appending the address part is the next task,
+the source address must be determined.
+Therefore the route must be determined.
+.pp
+After appending the address part to the fixed part of the
+NPDU header, CLNP
+appends any options given in the arguments to
+\fIclnp_output()\fR.
+The options are specified in a
+separate \fImbuf\fR stored in the \fIiso_pcb\fR.
+If this \fImbuf\fR
+pointer is not null, a copy of the \fImbuf\fR is made, and this copy is
+chained (appended) to the
+\fImbuf\fR in which the
+NPDU header resides. The options \fImbuf\fR linked in with the DT packet
+must be a copy of the options \fImbuf\fR passed to \fIclnp_output()\fR. If
+this was not done, then
+the options \fImbuf\fR passed would be freed by the interface
+driver after the NPDU had been transmitted.
+Since a copy must be made, it is possible for \fIclnp_output()\fR to
+return \fIENOBUFS\fR at this time.
+A later section of this chapter describes
+the handling of options in greater detail.
+.pp
+User data for the packet are passed to
+\fIclnp_output()\fR as an \fImbuf\fR chain.
+This \fImbuf\fR chain is appended to the DT NPDU header chain.
+At this point, the DT NPDU is ready for transmission.
+If header caching has not been disabled, a cache entry is made in the
+\fIisopcb\fR.
+If the size of the entire packet
+is less than the maximum transmission unit (MTU) of the
+network interface to be used,
+the packet is placed on the queue for that network interface,
+otherwise \fIclnp_fragment()\fR is invoked to
+break up the packet into smaller packets, called
+"derived NPDUs", and transmit the derived NPDUs.
+.sh 3 "When A Cached Header Exists"
+.pp
+In this case, \fIclnp_output()\fR updates the segmentation part of the
+header (if segmenting is permitted), computes the checksum, and transmits
+(or fragments) the packet.
+.pp
+The cached CLNP header is stored in the \fIstruct isopcb\fR. The field
+\fIisop_clnpcache\fR within the \fIisopcb\fR points to an \fImbuf\fR
+which contains a \fIstruct clnp_cache\fR:
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+struct clnp_cache {
+.T&
+l l l l.
++u_short+cni_securep;+/* ptr to security option */
++struct iso_addr+clc_dst;+/* destination of packet */
++struct mbuf+*clc_options;+/* ptr to options mbuf */
++int+clc_flags;+/* flags passed to clnp_output */
++int+clc_segoff;+/* offset of seg part of header */
++struct sockaddr+*clc_firsthop;+/* first hop of packet */
++struct ifnet+*clc_ifp;+/* ptr to interface */
++struct mbuf+*clc_hdr;+/* cached pkt hdr (finally)! */
+};
+.TE
+\fR
+.)b
+The first three fields \fIclc_dst, clc_options\fR and \fIclc_flags\fR
+are used to check the validity of the cache entry. The cache is considered
+valid if:
+.ip \(bu 5
+The options mbuf has not changed.
+.ip \(bu 5
+The destination of the packet has not changed.
+.ip \(bu 5
+The route still exists and is up.
+.ip \(bu 5
+The flags have not changed.
+.pp
+If all these conditions are met, then the bulk of the \fIclnp_output()\fR
+processing is avoided. The fields \fIclc_segoff, clc_firsthop,\fR
+and \fIclc_ifp\fR are used by \fIclnp_output()\fR to transmit the packet.
+The field \fIclc_ifp\fR contains the actual cached header which is copied
+and then enqueued on the outgoing interface.
+.sh 2 "NPDU Input"
+.pp
+.\" FIGURE
+.so figs/clnp_input.nr
+All CLNP NPDUs are processed by \fIclnp_input()\fR.
+.CF
+outlines
+the flow of control within \fIclnlintr()\fR and \fIclnp_input()\fR.
+The solid lines
+indicate normal flow of control. The dashed lines indicate
+possible error returns.
+.pp
+\fIClnlintr()\fR is invoked by a software interrupt.
+This interrupt is posted by a device driver whenever a
+packet is placed in CLNL's input queue
+\fIclnlintrq()\fR, and the queue is empty.
+It is the responsibility of \fIclnlintr()\fR, when invoked,
+to process all packets present on the input queue.
+Thus, to begin the task of processing a packet, \fIclnlintr()\fR
+removes the next packet from the queue.
+When an error is discovered during processing, the packet is discarded and
+\fIclnlintr()\fR begins afresh.
+.pp
+Once removed, the type of the NPDU is checked. If the NPDU is an
+ES-IS packet, then \fIesis_input()\fR is called. If the NPDU is a CLNP
+packet, then \fIclnp_input()\fR is called. Other packets are silently
+discarded.
+The function \fIclnp_hdr_ck()\fR checks the NPDU for consistency.
+Before checking consistency, \fIclnp_hdr_ck()\fR insures
+that the entire NPDU header is located
+contigiously in a single \fImbuf\fR (\fIm_pullup()\fR\** performs this task).
+.(f
+\** If the NPDU header is larger than \fIMLEN\fR (currently 256), then
+\fIm_pullup()\fR will allocate a cluster \fImbuf\fR.
+.)f
+After "pulling" the header into a single \fImbuf\fR, \fIclnp_hdr_ck()\fR
+checks for the proper CLNP version and protocol identification.
+It also checks that the lifetime field is greater than zero.
+After checking header consistency, the NPDU checksum is computed.\**
+.(f
+\** If the checksum value is zero, the checksum is not computed.
+The value zero is reserved to mean \*(lqdo not use checksum\*(rq.
+.)f
+If the checksum is valid, \fIclnp_data_ck()\fR is called to insure
+that the amount of data in the \fImbuf\fR chain corresponds to the
+amount indicated in the NPDU header.
+.pp
+Once the consistency of the NPDU has been assured, the various parts of the
+packet are extracted.
+Care is taken with each extraction to insure that an attempt is not made
+to address data that does not really exist. (Such an attempt could
+result in a kernel trap).
+.pp
+Next, the options part of the NPDU, if present, is checked for validity.
+If unsupported options are found, the packet is discarded.
+See the section \*(lqNPDU options\*(rq for details of options processing.
+.pp
+Finally, after the preceding checks and extractions have been made, the
+destination address is examined.
+If the address indicates that the packet's destination is not this
+system, the packet is forwarded by calling \fIclnp_forward()\fR.
+See the section \*(lqDT NPDU Forwarding\*(rl for details of packet forwarding.
+If this end system is the
+packet's destination, processing continues.
+.pp
+If the packet is not complete, it is passed to \fIclnp_reass()\fR for
+reassembly.
+See the section \*(lqDT NPDU Reassembly\*(rq
+for details of packet reassembly.
+.pp
+At this point, a complete NPDU is in hand.
+If the NPDU is a DT NPDU, it is given to the transport layer
+by calling the TP input routine.
+Otherwise, it is give to the ER NPDU processing function,
+\fIclnp_er_input()\fR.
+.sh 3 "DT NPDU Forwarding"
+.pp
+Packet forwarding is accomplished by \fIclnp_forward()\fR.
+This is performed regardless of the system's type (end or intermediate).
+The task of
+forwarding a packet is fairly straight-forward. First, the lifetime
+field of the datagram is decremented.
+If this operation changes the value to zero, the packet is discarded.
+.pp
+If the source route option is present, and the address at the top of the list
+matches an address of one of the system's network interfaces, then
+the next-source-route-to-be-used offset is adjusted in the option.
+Next, the packet is routed by \fIclnp_route()\fR
+or \fIclnp_srcroute()\fR.
+If the record route option is present, the address of the outgoing
+network interface is recorded by \fIclnp_dooptions()\fR.
+.pp
+Finally the packet is dispatched.
+If the size of the entire packet is less than the MTU of the output
+network interface, the packet is enqueued for that interface,
+otherwise \fIclnp_fragment()\fR is invoked to
+fragment the packet and enqueue the derived NPDUs.
+.sh 2 "NPDU Options"
+.pp
+The options section of an NPDU consists of a series of triplets:
+\fIoption identification\fR, \fIoption length\fR,
+and \fIoption value\fR.
+These triplets are checked each time the options are examined or changed.
+To avoid repeated parsing of the options, the ARGO CLNP
+maintains an index.
+This index is organized as a \fIclnp_optidx\fR structure.
+This structure is shown below.
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+struct clnp_optidx {
+.T&
+l l l l.
++u_short+cni_securep;+/* ptr to security option */
++char+cni_secure_len;+/* length of security option */
++u_short+cni_srcrt_s;+/* offset of src rt option */
++u_short+cni_srcrt_len;+/* length of src rt option */
++u_short+cni_recrtp;+/* ptr to head of recrt option */
++char+cni_recrt_len;+/* length of recrt option */
++char+cni_priorp;+/* ptr to priority option */
++u_short+cni_qos_formatp;+/* ptr to format of qos option */
++char+cni_qos_len;+/* length of qos option */
++char+cni_er_reason;+/* reason from ER pdu option */
+};
+.TE
+.)b
+This index allows CLNP quickly to discover the existence
+and value of an option.
+For example, if a security option is present, the \fIcni_securep\fR
+field of the option index is non-zero and the value of
+\fIcni_securep\fR is an offset to the beginning of the
+security option.
+The function \fIclnp_opt_sanity()\fR
+parses the options and computes the index.
+While parsing, it also verifies that the
+options are valid and correctly structured.
+If an error occurs while parsing an option,
+\fIclnp_opt_sanity()\fR returns an error code.
+The following sections describe how options are processed
+during the send, forward and receive operations.
+.sh 3 "Sending Options"
+.pp
+Options to be sent with a datagram are passed to \fIclnp_output()\fR as
+two arguments. An option index is passed along with an \fImbuf\fR
+containing the options.
+The options in the \fImbuf\fR must be formatted
+exactly as specified by CLNP.
+If the security, quality of service, or
+priority options are specified, \fIclnp_output()\fR will not transmit the
+datagram and \fIEINVAL\fR is returned.
+The system call \fIsetsockopt()\fR is used to set the CLNP options
+to be sent on a datagram.
+See \fIclnp(4)\fR for more information about setting CLNP options.
+.pp
+If a source route is specified,
+the normal CLNP routing function \fIclnp_route()\fR is not used, and
+\fIclnp_srcroute()\fR is invoked.
+.pp
+When the DECBIT config option is specified, \fIclnp_output\fR will
+automatically add the globally unique quality of service option to the packet.
+The sequencing preferred and low delay bits in this option are set.
+.sh 3 "Forwarding Options"
+.pp
+During packet forwarding, the padding, security,
+and priority options are ignored. If record route is selected, the
+function \fIclnp_dooptions()\fR logs the current network
+interface address in the record route list.
+.pp
+If a source route is specified,
+the normal CLNP routing function \fIclnp_route()\fR is not used, and
+\fIclnp_srcroute()\fR is invoked.
+.sh 4 "The Congestion Experienced Bit"
+.pp
+If a packet is forwarded containing the globally unique quality of
+service option, and the interface through which the packet will be
+transmitted has a queue length greater than \fIcongest_threshold\fR,
+then the congestion experienced bit is set in the quality of service option.
+.pp
+The threshold value stored in \fIcongest_threshold\fR may be changed
+with the \fIclnlutil\fR utility.
+.sh 3 "Receiving Options"
+.pp
+On receipt, all CLNP options are ignored except the security
+and globally unique quality of service option.
+If the security option is found, the packet is discarded.
+If the globally unique quality of service option is present, and the
+congestion experienced bit is set, then the transport congestion
+control function \fItpclnp_ctlinput(PRC_QUENCH2, addr)\fR is called.
+The following table summarizes the CLNP option processing.
+.(b
+.TS
+allbox, tab(+);
+l l l l.
+Option+Send+Forward+Receive
+=
+Padding+may be set+-+-
+Security+reject+ignore+discard
+Source Route+\fIclnp_srcroute()\fR+\fIclnp_srcroute()\fR+-
+Record Route+-+\fIclnp_dooptions()\fR+-
+QOS+added+congestion bit set+tpclnp_ctlinput()
+Priority+reject+ignore+-
+.TE
+.)b
+.sh 2 "DT NPDU Segmentation"
+.pp
+Segmentation is the process by which initial NPDUs are segmented into
+smaller derived NPDUs when the initial NPDU is too large for transmission
+on a network interface.
+Segmentation is accomplished by \fIclnp_fragment()\fR.
+This function chops the NPDU into pieces and individually places the pieces
+in the appropriate network interface's output queue.
+Each piece is made as large as possible.
+Note: The phrase "fragmentation" is used synonymously with "segmentation"
+throughout this prose and the CLNP fragmentation code. This is due to
+this author's familiarity with the DoD Internet Protocol which uses
+the term "fragment."
+.sh 2 "DT NPDU Reassembly"
+.pp
+Derived NPDUs are put back together by the process called
+reassembly.
+Reassembly is performed only at the destination end system.
+When a derived NPDU arrives, it is passed to \fIclnp_reass()\fR.
+This function scans a linked list of NPDUs awaiting reassembly.
+Each packet in the list is represented by a fragment list
+descriptor, which is stored in an \fImbuf\fR:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct clnp_fragl {
+.T&
+l l l l.
++struct iso_addr+cfl_src;+/* source */
++struct iso_addr+cfl_dst;+/* destination */
++u_short+cfl_id;+/* id of the pkt */
++u_char+cfl_ttl;+/* time to live */
++u_short+cfl_last;+/* offset of last
++++byte of packet */
++struct mbuf +*cfl_orighdr;+/* ptr to
++++original header */
++struct clnp_frag+*cfl_frags;+/* linked list
++++of fragments */
++struct clnp_fragl+*cfl_next;+/* next pkt be-
++++ing reassembled */
+};
+.TE
+\fR
+.)b
+The fields \fIcfl_src\fR, \fIcfl_dst\fR, and \fIcfl_id\fR are used to
+match an incoming derived NPDU with a fragment list.
+\fICfl_orighdr\fR contains a copy of the NPDU header of the first fragment received.
+The linked list of fragments pertaining to the packet is stored in the
+\fIcfl_frags\fR field.
+Each NPDU fragment represented by a \fIclnp_frag\fR structure,
+stored in an \fImbuf\fR:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct clnp_frag {
+.T&
+l l l l.
++u_int+cfr_first;+/* offset of
++++first byte of this frag */
++u_int+cfr_last;+/* offset of last
++++byte of this frag */
++u_int+cfr_bytes;+/* bytes to shave */
++struct mbuf+*cfr_data;+/* ptr to data */
++struct clnp_frag+*cfr_next;+/* next frag */
+};
+.TE
+\fR
+.)b
+The fields \fIcfr_first\fR and \fIcfr_last\fR indicate the first and
+last octet of the fragment.
+\fICfr_data\fR points to an mbuf chain
+which contains the data for the fragment.
+.pp
+If \fIclnp_reass()\fR finds a \fIclnp_fragl\fR structure matching the
+incoming derived NPDU, \fIclnp_insert_frag()\fR is called to create
+a \fIclnp_frag\fR structure and insert it in the linked list of
+packet fragments.
+If no \fIclnp_fragl\fR structure is found,
+\fIclnp_newpkt()\fR is invoked to create a new fragment list structure.
+.pp
+The last task \fIclnp_reass()\fR performs is to check if the fragment
+that just arrived completes the reassembly of the initial NPDU.
+If it does, the reassembled NPDU is rearranged to
+look like it just arrived intact.
+It accomplishes this by linking the \fImbuf\fRs holding
+the fragments into one \fImbuf\fR chain that represents the initial
+NPDU.
+A pointer to this \fImbuf\fR chain is returned by \fIclnp_reass()\fR.
+.pp
+If the newly arrived fragment does not complete an initial NPDU,
+\fIclnp_reass()\fR returns NULL.
+.sh 3 "Reassembly Lifetime Control"
+.pp
+One function of the CLNP is to prevent
+a proliferation of fragments awaiting reassembly from
+consuming buffers in an end system for indefinite periods of time.
+This function is called reassembly lifetime control.
+It is accomplished by
+periodic traversal of
+the list of \fIclnp_fragl\fR structures, decrementing the
+\fIcfl_ttl\fR field.
+This field is a copy of the NPDU time-to-live
+field. If \fIcfl_ttl\fR reaches zero, all resources associated with the
+fragment are released.
+The procedure
+\fIclnp_slowtimo()\fR, which is called by the system
+clock every 500 milliseconds (every half-second),
+performs the CLNP reassembly lifetime control.
+.sh 2 "ER NPDU"
+.pp
+An ER NPDU is sent to the originator of a packet when a DT NPDU is
+discarded and the error report function is not suppressed. Suppression
+of the error report function is accomplished by setting the "no ER"
+bit in the CLNP header.
+A packet is discarded by \fIclnp_discard()\fR.
+Before it
+returns the \fImbufs\fR used to store the
+the discarded packet to the \fImbuf\fR free list,
+\fIclnp_discard()\fR
+determines if the error report function is suppressed.
+If not,
+an ER NPDU will be sent to the originator of the discarded packet by
+calling \fIclnp_emit_er()\fR.
+.pp
+\fIClnp_emit_er()\fR will create an ER NPDU, address it to the
+originator of the discarded packet, route the NPDU,
+and transmit it, sending the header of the discarded NPDU as data.
+ER NPDUs may not be segmented.
+If the ER NPDU is too large for the outgoing network interface,
+the packet is truncated.
+.sh 2 "Raw CLNP"
+.pp
+In order to test CLNP in isolation from higher layer
+protocols, ARGO provides a \*(lqraw\*(rq interface to CLNP.
+This raw interface is selected with the \fISOCK_RAW\fR parameter to
+the
+\fIsocket()\fR
+system call.
+When a \*(rqraw\*(rq socket is open,
+and CLNP receives an NPDU,
+CLNP must determine whether the incoming NPDU is destined for
+the
+\*(rqraw\*(rq interface or for the interface to the
+OSI transport protocol entity.
+ARGO addresses this problem by using non-standard NPDU types
+for packets sent on \*(rqraw\*(rq sockets.
+The type field in the CLNP NPDU header
+is set to \fICLNP_RAW\fR (hex 1d) rather than \fICLNP_DT\fR
+in NPDUs that originate from
+\*(rqraw\*(rq sockets.
+This non-standard type value is used by \fIclnp_input()\fR
+to decide which upper layer protocol should receive the packet.
+See \fIclnptest(8)\fR for more information about the.
+\*(rqraw\*(rq CLNP interface.
+.sh 2 "CLNP Echo"
+.pp
+In the DoD world, ICMP supports an \fIecho\fR service.
+This allows one to \*(lqping\*(rq a distant gateway and
+to receive an echo response (a packet in return) if the gateway is working.
+There is no counterpart to \*(lqecho\*(rq in ISO 8473 (CLNP).
+ARGO provides this non-standard feature in its connectionless
+network layer.
+.pp
+Like raw CLNP, implementing an echo function requires a non-standard
+NPDU type value to allow
+\fIclnp_input()\fR to differentiate between a DT NPDU to be forwarded
+or passed to a higher layer protocol, and an NPDU that is to be echoed.
+When requesting an echo,
+the CLNP type field is set to \fICLNP_EC\fR (hex 1E) rather
+than CLNP_DT.
+When \fIclnp_input()\fR receives a packet with type
+\fICLNP_EC\fR,
+it swaps the source and destination addresses, sets the
+type field to \fICLNP_ECR\fR (hex 1F) and forwards
+the packet back to the sender.
+See also \fIclnpping(8)\fR.
+.sh 2 "Timers"
+.pp
+The only timer used by CLNP is the
+500 millisecond timer, which is
+user for reassembly lifetime control.
+See the section \*(lqReassembly Lifetime Control.\*(rq
+.sh 1 "End System to Intermediate System Routing Protocol (ES-IS)"
+.\" ROB
+.sh 2 "Overview"
+.pp
+This section describes the implementation of the ES-IS routing protocol.
+This protocol is used primarily to resolve NSAP address to SNPA address
+translations. It is also used to identify end systems
+and intermediate systems on
+the local subnetwork.
+All of this work is accomplished by transmitting
+packets of the type End System Hello (ESH), Intermediate System Hello (ISH)
+and Request Redirect (RD).
+.pp
+For the purpose of this section, the following definitions of end system (ES)
+and intermediate system (IS) apply.
+.ip \(bu 5
+An \fIend system\fR is an open system that
+is an OSI end system in the standard OSI sense
+(that it supports a full OSI protocol suite in addition to the network layer)
+and that
+implements the functions of the
+the ES-IS protocol that are mandatory for end systems,
+such as the Query Configuration function and the Record Redirect
+function,
+but that does not implement
+the functions of the ES-IS protocol that are for intermediate systems.
+.ip \(bu 5
+An \fIintermediate system\fR is an open system that
+is an OSI intermediate system in the standard OSI sense
+(that it performs packet routing in the network layer)
+and that
+implements the functions of the
+the ES-IS protocol that are mandatory for intermediate systems,
+such as the Request Redirect function,
+but not the functions of the ES-IS protocol that are for end systems.
+.pp
+While system may be an ES or an IS or both according to the
+standard OSI definitions, this is not the case in the context of
+the ES-IS protocol.
+.pp
+An ARGO system is by default an end system, by the definitions given above.
+An ARGO system can be made to function as an intermediate system
+instead of an end system with the \fIclnlutil\fR program.
+See \fIclnlutil(8)\fR for more information.
+.sh 2 "Report Configuration Function"
+.pp
+The report configuration function is used by end systems and intermediate
+systems to inform each other of their reachability and current subnetwork
+addresses.
+This function is invoked whenever the configuration timer
+expires.
+This timer fires at a frequency of once every
+\fIesis_config_time\fR seconds.
+By default, this value is 60 (seconds),
+but it may be changed with the \fIclnlutil\fR program.
+.pp
+The report configuration function is contained in the C function
+\fIesis_config()\fR. Called every \fIesis_config_time\fR seconds,
+\fIesis_config()\fR searches the list of active network interfaces
+calling \fIesis_shoutput\fR for each interface that is up, has
+broadcast ability and has an ISO address configured.
+.pp
+The function \fIesis_shoutput()\fR has the responsibility of building and
+transmitting ESH and ISH packets.
+It takes several arguments, including a pointer to a network interface
+and
+a packet type (ESH or ISH).
+If the packet type is ESH, then
+each NSAP address configured on the specified interface is added to
+the ESH NPDU. ISH NPDUs may only contain a single NSAP address\**.
+.(f
+\** Actually, ISH packets contain Network Entity Titles (NETs). ARGO
+does not make a distinction between NETs and NSAPs.
+.)f
+After the packet is built, it is transmitted on the subnetwork. ESH packets
+are sent to the multicast address \fIall intermediate systems\fR, whereas
+ISH packets are sent to the multicast address \fIall end systems\fR.
+.pp
+Each ISH and ESH NPDU contains
+a holding timer setting. This setting (specified
+in seconds) is used by the receiver of the NPDU to set its
+holding timer. When its holding timer expires, the information from
+the NPDU is erased. The holding timer value sent on each ISH and ESH NPDU
+is contained in the variable \fIesis_holding_time\fR. By default, this
+timer setting is 120 seconds. This value may be changed with the
+\fIclnlutil\fR utility program.
+.sh 2 "Record Configuration Function"
+.pp
+The Record Configuration function receives ESH or ISH NPDUs, extracts the
+configuration information, and updates kernel-resident tables.
+The two functions \fIesis_eshinput()\fR and \fIesis_ishinput()\fR
+process incoming ESH and ISH NPDUs, respectively.
+.pp
+The ES-IS entity maintains a table that
+associates a SNPA-addresses with NSAP-addresses.
+This table is called the \fISNPA cache\fR.
+.pp
+Whenever an ESH or ISH NPDU is received,
+an entry is made in the SNPA cache
+via the \fIsnpac_add()\fR function.
+This entry is kept in the cache until the holding timer expires.
+In addition to adding an entry to the SNPA cache, \fIsnpac_add()\fR creates
+a default ISO route toward the sender of the ISH.
+One such route is kept so that the ES-IS entity has at most one
+route to an IS at any time.
+Note that ISHs from different sources will
+cause the route to the source of the earlier ISH to be
+overwritten.
+The default route
+will be removed when the ISH holding timer expires.
+.pp
+If, at the time an ESH or ISH NPDU is received, the SNPA cache
+contains no entry for the NSAP address in the NPDU just received,
+an ESH or ISH (depending on the system type) NPDU is
+transmitted to the sender of the NPDU just received.
+.sh 2 "Resolving NSAP addresses to SNPA addresses: Query Configuration Function"
+.pp
+Whenever a device driver needs to resolve an NSAP address to
+an SNPA address, it calls \fIiso_snparesolve()\fR. This function first looks
+up the NSAP address in the SNPA cache. If a match is found, the
+corresponding SNPA address is returned. If a match is not found and the
+system is an end system, and there is a known intermediate system, then
+the SNPA address of the intermediate system is returned. It is assumed that
+the intermediate system will forward the packet and transmit a redirect back
+(see "Redirection Generation", below).
+If a match is not found and the system is an end system, but there is no
+known intermediate system, then \fIiso_snparesolve()\fR will return
+the multicast address \fIall end systems\fR.
+In all other cases, \fIiso_snparesolve()\fR will return an error.
+This is known as the query configuration function.
+.sh 3 "Configuration Response Function"
+.pp
+In order for the query configuration function to be effective, the network
+entity that receives a CLNP DT sent to the \fIall end system\fR
+multicast address must transmit an ESH back to the sender of the DT.
+This is called the configuration response function and is accomplished by
+calling \fIsh_output()\fR from within \fIclnp_input()\fR.
+.sh 2 "Redirection Generation"
+.pp
+When an intermediate system forwards a packet onto the same interface
+upon which
+the packet arrived, a redirect (RD) NPDU is generated. This NPDU is
+transmitted by calling \fIesis_rdoutput()\fR from within \fIclnp_forward()\fR.
+Note that end systems may forward packets but they do not generate RD PDUs.
+.sh 2 "Redirection Receipt"
+.pp
+RD NPDUs direct an end system to create an SNPA cache entry
+for an NSAP address, or, if such an entry exists, to change
+the SNPA address associated with the NSAP address.
+The receipt of RD NPDUs is handled by \fIesis_rdinput()\fR.
+This function
+parses the RD NPDU and adds an entry to the SNPA cache for the corresponding
+destination NSAP address.
+If the redirect is toward an intermediate system,
+meaning that the RD NPDU contains an SNPA address
+of an intermediate system (gateway),
+a route is created for the destination NSAP with the intermediate system as
+the first hop, or gateway, in the route.
+.sh 2 "Multicast Addresses"
+.pp
+As specified by the December 1987 NBS agreements, the address
+\fIall end systems\fR is {0x09, 0x00, 0x2B, 0x00, 0x00, x04} and the address
+\fIall intermediate systems\fR is {0x09, 0x00, x02B, 0x00, 0x00, 0x05}.
+These multicast addresses are only used on the 802.3 subnetwork (baseband).
+Broadcast addresses are used on the 802.5 subnetwork (token ring). See
+the comment in \fC/sys/netargo/iso_snpac.c\fR for more information on
+multicast addresses.
+.sh 1 "Connection Oriented Network Service and Subnetwork Service"
+.pp
+The following sections describe the design of the Connection Oriented
+Network Service (CONS) and the Connection Oriented Subnetwork Service
+(COSNS).
+The CONS and COSNS are provided by two functionally separate but related
+modules, a connection manager and the ISO 8208 (X.25) protocols.
+The connection manager is also known in OSI terminology as a
+subnetwork dependent convergence function, or SNDCF.
+In ARGO it is used for more than an SNDCF, and it is a sort of
+"glue" that binds a transport service, a network service, a
+subnetwork service, and a device driver together, so
+hereinafter it is called "the glue".
+This code performs the some of the functions of ISO 8878,
+which specifies how ISO 8208 (X.25) can be used to provide the OSI
+connection oriented network service.
+The X.25 protocols are implemented in a coprocessor
+made by Eicon Technology, Inc.
+The device driver \fBecn\fR is the Unix kernel interface to this
+coprocessor.
+The sections that follow describe the glue and the \fBecn\fR device
+driver.
+.sh 2 "The Glue"
+.pp
+The glue provides
+services to several modules in the kernel:
+.ip "Subnetwork service" 5
+is provided to other network layer protocols, such as CLNP (ISO 8473).
+The ARGO CLNP uses this service.
+The Internet IP could be made to use this service with
+minimal effort, because this service interface is made to look
+like a standard Unix BSD link layer service (it has
+a device driver interface).
+.ip "Network service" 5
+is provided to transport layer protocols, such as TP (ISO 8073).
+This service interface looks like a standard Unix BSD
+network service (a procedure call interface).
+.ip "Transport service" 5
+could be provided to the socket module.
+While this is not provided with the ARGO software, the glue
+is designed to permit
+such a service to be provided with little additional programming effort.
+.pp
+Higher layer protocols
+that use a connection-oriented
+network or subnetwork service need to manage virtual
+circuits in a similar fashion.
+Rather than put connection management functions into each higher
+layer protocol (HLP) entity
+that uses the CONS or COSNS,
+in ARGO the connection management is in one module, the glue.
+Other alternatives exist, for example in the OSI world,
+one may place in the TP entity the function of connection management for TP,
+and implement a network connection management subprotocol
+of the transport layer (ISO 8073 DAD1, NCMS).
+In addition, connection management for CLNP may be implemented as part of
+the CLNP entity.
+A subnetwork dependent convergence protocol (ISO 8878/A) may
+be implemented to support connection management for CLNP.
+The approach taken in ARGO is different from those suggested in ISO
+for two reasons.
+First, ARGO aims to minimize the amount of code written to perform a given
+task.
+Second, ARGO has several coexisting paths through the network layer,
+which the ISO approach does not address.
+For example, in both ISO 8878/A and in NCMS it is assumed that if
+an incoming call arrives from NSAP \(*b
+while a call to NSAP \(*b is being placed,
+the two calls are resolved to one virtual circuit.
+This is not feasible in the ARGO scenario, since it may not be known
+until after
+the calls are established and higher level packets are exchanged
+whether the two calls are to be used
+for the same path and for the same higher layer protocols.
+A possible alternative approach is to use an NSAP-address for each path
+through the network layer
+(or protocol suite).
+This was rejected in the ARGO design because it puts the burden
+on the calling application entity or network entity to
+determine the proper NSAP-address to use to determine the protocol
+suite to be used to reach the destination end system.
+For this reason, none of the approaches suggested in ISO is adopted
+here.
+.pp
+The glue provided in the ARGO
+kernel does not provide the full OSI network service.
+It provides that subset of the network service that is used
+by ARGO TP and by ARGO CLNP.
+The OSI connection-oriented network service elements that are
+are provided are described in Chapter Four,
+in the section titled "Connection Oriented Network Service".
+.pp
+Each module using the glue has its own service
+interface to the glue.
+.\" When X.25 is used as a
+.\"transport service, the standard protocol switch table is used, and the procedure
+.\"\fIcons_usrreq()\fR is the protosw entry for a
+.\"service in the iso protosw table that provides the
+.\"SOCK_STREAM abstraction in the AF_ISO address family,
+.\"with protocol ISOPROTO_X25.
+.\"This service is called XTS in the glue code and hereafter
+.\"in this document.
+.\".pp
+When the transport layer uses the glue as a network service,
+the interface is the procedure
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+error = cons_output( isop, m, len, isdgm )
+.T&
+l l l.
+ +struct isopcb +*isop;
+ +struct mbuf +*m;
+ +int+error, len, isdgm;
+.TE
+\fR
+.)b
+.pp
+When the network layer uses the glue as a subnetwork service
+the interface is the device driver-like procedure
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+error = cosns_output( ifp, m, dst )
+.T&
+l l l.
+ +struct ifnet +*ifp;
+ +struct mbuf +*m;
+ +struct sockaddr_iso +*dst;
+ +int+error;
+.TE
+\fR
+.)b
+.pp
+When the glue is used as a connection-oriented service
+(i.e., by TP 0, and by TP 4 during the transport
+connection establishment phase, during which
+it is not yet known whether class 0 or class 4 will be used)
+the following procedures are used:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+error = cons_openvc( copcb, dstaddr, so )
+.T&
+l l l.
+ +struct cons_pcb +*copcb;
+ +struct sockaddr_iso +*dstaddr;
+ +struct socket+*so;
+.T&
+l s s s.
+ +++
+error = cons_netcmd( cmd, isop, vc, isdgm )
+.T&
+l l l.
+ +int+cmd;
+ +struct isopcb +*isop;
+ +int+channel, isdgm;
+.TE
+\fR
+.)b
+.pp
+The procedure \fIcons_openvc()\fR places a call.
+The procedure \fIcons_netcmd()\fR accepts, rejects, or clears
+a call.
+There is no incoming call indication, because
+the glue uses the passive open model for accepting calls.
+The HLP simply sees a new incoming packet, and is given
+a virtual circuit number (channel) along with the incoming packet.
+If the HLP chooses to reject the call
+it may do so, which will cause the virtual circuit (VC) to be cleared.
+.pp
+The glue may reject (clear) an incoming call for its own reasons.
+The following table lists the reasons that the glue may
+clear a call and the ISO 8208 diagnostic code used on the X.25 clear packet
+in each case.
+For a complete list of the permissible diagnostic codes, see
+Figure 14-B of ISO 8208.
+.in -5
+.(b
+.TS
+center expand box tab(+);
+l l.
+Reason+Diagnosic code
+=
+The VC was opened for use with CLNP +Higher level initiated reset
+or TP 4 and has been idle for the +user resynchronization
+maximum inactivity time. +(0xfa)
+_
+The HLP closed +Higher level initiated disconnection
+this network connection. +- normal (0xf1)
+_
+The HLP rejected +Higher level initiated connection
+this network connection. +rejection - transient condition (0xf4)
+_
+The X.25 call packet contained +Higher level initiated connection
+facilities that are not supported +rejection - incompatible
+by the glue, or did not contain +information in user data (0xf8)
+necessary information, e.g. calling +
+or called DTE address. +
+_
+The X.25 call packet contained +Higher level initiated connection
+call user data that does not +rejection - unrecognizable protocol
+indicate any HLP supported by ARGO +identifier in user data
+HLP supported by ARGO +(0xf9)
+_
+The given destination +OSI Network service problem: NSAP
+NSAP-address is not supported +address unknown (permanent
+ +condition) (0xeb)
+_
+The X.25 packet or a facility +Packet not allowed-
+therein was too long +packet too long. (0x27)
+.TE
+.)b
+.in +5
+.pp
+The glue provides several functions common to all
+modules (HLPs) that use the glue.
+Regardless of the HLP,
+the DTE addresses and NSAP addresses are associated in the same
+manner.
+One same network layer protocol identification scheme
+(ISO PDTR 9577) for all HLPs.
+Several different HLPs need to close inactive X.25
+virtual circuits after a timer expires.
+The glue insulates the
+device driver interface to the X.25 coprocessor
+from the HLP.
+.pp
+TP class 0 connections
+.\" and the X.25 "transport service"
+do not share X.25 VCs
+.\" with each other or among transport service-level circuits (sockets),
+so
+.\" these two modules need to keep X.25
+the glue needs to maintain
+a 1-1 correspondence between VCs
+and sockets.
+.\" For use by TP 0 and XTS,
+For use by TP 0,
+one network-level pcb is needed for each socket, and that is a
+\fIcons_pcb\fR, described below.
+.pp
+TP class 4 connections may share VCs,
+and TP 4 makes no correspondence between sockets and VCs.
+CLNP regards VCs similarly to TP 4.
+A given VC may be used simultaneously for many higher level connections,
+but all higher level connections using a given VC must use the same
+path or protocol suite.
+In other words, a TP4 connection running over CONS may not share a
+VC with a TP4 connection running over CLNS/COSNS.
+.pp
+To manage VCs and to maintain the separation of sharable and non-sharable
+VCs, the glue uses the following protocol control block:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct cons_pcb {
+.T&
+l l l.
+ +struct isopcb+_co_isopcb;
++u_short+co_state;
++u_char+co_flags;
++u_short+co_ttl;
++u_short+co_init_ttl;
++int+co_channel;
++struct ifnet+*co_ifp;
++struct protosw+*co_proto;
++struct dte_addr+co_peer_dte;
++struct ifqueue+co_pending;
+};
+.T&
+l l s.
+#define co_next+_co_isopcb.isop_next
+#define co_prev+_co_isopcb.isop_prev
+#define co_head+_co_isopcb.isop_head
+#define co_laddr+_co_isopcb.isop_laddr
+#define co_faddr+_co_isopcb.isop_faddr
+#define co_lport+_co_isopcb.isop_laddr.siso_tsuffix
+#define co_fport+_co_isopcb.isop_faddr.siso_tsuffix
+#define co_route+_co_isopcb.isop_route
+#define co_socket+_co_isopcb.isop_socket
+}+
+.TE
+\fR
+.)b
+.pp
+The \fIcons_pcb\fR contains
+an \fIisopcb\fR so that TP 0
+.\" and XTS
+may use the routines that manipulate \fIisopcb\fR structures for allocating
+and
+deallocating PCBs, binding addresses to PCBs,
+and finding routes.
+.pp
+A CONS PCB has states CLOSED, LISTENING, CLOSING,
+CONNECTING, ACKWAIT, and OPEN.
+This represents the state of the VC to the degree necessary to the glue.
+The glue uses the passive open model for opening VCs.
+The coprocessor device driver always accepts
+incoming calls and passes an indication to the glue when
+a call is accepted by the coprocessor.
+If the user of the glue (the HLP) or the glue itself decides
+that the VC is not desired, the VC is cleared.
+.pp
+The \fIcons_pcb\fR contains a bit mask, \fIco_flags\fR, with values:
+.(b
+\fC
+.TS
+tab(+);
+l l l l.
+#define+CONSF_OCRE+0x40+/* created on OUTPUT */
+#define+CONSF_ICRE+0x20+/* created on INPUT */
+#define+CONSF_DGM+0x04+/* for datagram use only */
+.TE
+\fR
+.)b
+.pp
+The flag
+CONSF_DGM means that the VC is being used to provide a
+datagram (connectionless, unreliable, unsequenced)
+service to the higher layer, and that requests for additional VCs
+from the same higher layer entity
+may be served by this VC, effectively
+multiplexing higher layer connections on this VC.
+When this flag is set in a \fIcons_pcb\fR, there is no associated
+\fIco_socket\fR pointer.
+When CONSF_DGM is not set, there is an associated
+\fIco_socket\fR pointer, and the VC is being used for
+TP 0.
+.pp
+The flag
+CONSF_ICRE means that the VC was created by
+and incoming call indication.
+The flag
+CONSF_OCRE means that the VC was created
+on behalf of an outgoing call request.
+.pp
+The \fIstruct dte_addr\fR field, \fIco_peer_dte\fR,
+contains the peer's DTE address.
+The glue locates VCs by searching the list of protocol control
+blocks for a PCB with a DTE matching that desired.
+.pp
+The glue is given an NSAP-address by the HLP entity.
+The glue finds the desired DTE address by searching the
+ES-IS SNPA cache for an SNPA-address (DTE address) associated
+with the NSAP-address given by the HLP entity.
+This means that to use the CONS, an entry for each desired
+peer must appear in the SNPA cache.
+ARGO does not provide the ES-IS protocol for use with ISO 8208, so
+"permanent" or static entries must be placed in this cache by hand,
+using the utility program \fIclnlutil\fR.
+.pp
+When an incoming call is accepted, the peer's DTE address is
+placed in the SNPA cache along with
+an NSAP address generated as follows:
+.np
+If the incoming call contained the peer's NSAP-address
+in an Address Extension Facility (AEF, available with 1984 X.25),
+this NSAP-address is used, otherwise
+.np
+the glue creates a "type-37" address (the format defined by AFI 37
+in ISO 8348/AD 2).
+.pp
+TP 4 can have its outgoing packets sent on more than one VC.
+The glue presently contains no mechanism for fanning outgoing
+packets onto several VCs, however,
+it does not prohibit packets arriving for TP 4 on any VC that
+opened with the protocol identifier for TP.
+.pp
+The glue has the ability to generate AEFs on outgoing calls, but
+this ability is turned off,
+since the public data network on which ARGO runs at Wisconsin
+does not support 1984 X.25, and so it rejects packets containing
+AEFs.
+The use of AEFs can be reinstated by making a kernel with the
+option \fBX25_1984\fR or by adding the line
+.nf
+.in +5
+\fC
+#define X25_1984
+\fR
+.in -5
+.fi
+at the top of the file
+\fC/sys/netargo/if_cons.c\fR
+and rebuilding the kernel.
diff --git a/share/doc/iso/wisc/net_serv.nr b/share/doc/iso/wisc/net_serv.nr
new file mode 100644
index 0000000..4608f01
--- /dev/null
+++ b/share/doc/iso/wisc/net_serv.nr
@@ -0,0 +1,163 @@
+.NC "Network Service Interface"
+.sh 1 "Connectionless Network Service"
+.pp
+This section describes the interface to the ISO connectionless network service.
+There are really two interfaces to the CLNS: the internal interface
+and the IPC interface.
+The internal interface is based on
+procedure calls. It is used only within the kernel. The IPC interface
+allows a user process to access the CLNS directly. This is used only
+for testing and debugging purposes.
+.sh 2 "Primitives"
+.pp
+The CLNS is, by definition, connectionless. Therefore, there are no
+primitives associated with connection establishment or connection release.
+There is one primitive associated with data transfer: N-UNITDATA.
+The parameters to a N-UNITDATA request are: source NSAP address,
+destination NSAP address, quality of service, and user data.
+The parameters of a N-UNITDATA indication are identical to those of the
+request.
+In this implementation, the quality of service parameter is not supported.
+.sh 2 "Internal Interface"
+.pp
+Within the kernel, an N-UNITDATA request is effected by the procedure
+\fIclnp_output()\fR:
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+clnp_output(m0, isop, datalen, flags)
+.T&
+l l l.
+ +struct mbuf+*m0;+/* data */
+ +struct isopcb+*isop;+/* ISO protocol control block */
+ +int+datalen;+
+ +int+flags;+/* flags */
+.TE
+\fR
+.)b
+This procedure will construct a DT NPDU, route it, and transmit it on
+the appropriate subnetwork. \fIM0\fR is an mbuf chain containing the
+user data portion of the N-UNITDATA request. \fIIsopcb\fR is the iso protocol
+control block previously allocated. \fIClnp_output\fR will use the following
+fields: \fIisop_laddr\fR, isop_faddr, isop_route, isop_options,
+isop_optindex, \fI and \fRisop_clnpcache\fR.
+\fIDatalen\fR specifies the number of bytes of user data.
+The \fIflags\fR parameter will be discussed in a subsequent chapter.
+.pp
+A N-UNITDATA indication occurs when a DT NPDU arrives. The indication is
+generated by calling the appropriate upper layer protocol input routine.
+In the case of TP, the procedure \fItpclnp_input()\fR is called:
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+tpclnp_input(m, src, dst, len)
+.T&
+l l l.
+ +struct mbuf+*m;+/* DT NPDU */
+ +struct iso_addr+*src;+/* source of packet */
+ +struct iso_addr+*dst;+/* destination of packet */
+ +int+len;+/* length of clnp header */
+.TE
+\fR
+.)b
+\fIM\fR contains the entire DT NPDU packet. \fILen\fR indicates the size
+of the CLNP header. In other words, the user data of the DT NPDU begins
+\fIlen\fR bytes into \fIm\fR. \fISrc\fR and \fIdst\fR indicate the
+source and destination NSAP addresses of the packet.
+.sh 3 "CLNP/Subnetwork Interface"
+.pp
+The design of the interface between the subnetwork and the CLNP is
+determined by the design of the Unix network interface drivers. CLNP
+follows the conventional mechanisms for exchanging packets with a network
+interface. See the section on Network Interface Drivers in Chapter Five
+for more information on these conventions.
+.sh 2 "IPC (\*(lqRaw\*(rq) Interface"
+.pp
+The IPC interface to the CLNS allows direct (called \*(lqraw\*(rq)
+access to CLNP.
+This interface is intended for testing and debugging only.
+Its use results in the
+transmission of CLNP datagrams with nonstandard identification fields.
+These raw packets may be rejected by a system not employing the same
+convention. See the section on network implementation for more information
+about the conventions.
+.pp
+In order to gain access to the raw interface
+a \fIsocket\fR, with address family AF_ISO and type SOCK_RAW must be created.
+With this socket in hand,
+the system calls \fIsendto()\fR and \fIrecvfrom()\fR can be used to
+transmit and receive raw CLNP datagrams.
+.sh 3 "Sending raw datagrams"
+.pp
+The format of the \fIsendto()\fR system call is:
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+cc = sendto(s, msg, len, flags, to, tolen)
+.T&
+l l l.
+int+cc,s;
+char+*msg;
+int+len,flags;
+struct sockaddr+*to;
+int+to;
+.TE
+\fR
+.)b
+\f\fIS\fR is the socket previously created. \fIMsg\fR is a pointer to
+the data for the NPDU. CLNP will prepend a header to this data before
+transmission. \fILen\fR specifies the number of bytes of data. The
+\fIflags\fR parameter is unused and should be zero. \fITo\fR specifies the
+NSAP address to be used as the destination address. The size (in bytes)
+of \fIto\fR is given in \fItolen\fR. CLNP will automatically insert
+the source address based upon the route taken by the packet. The number of
+user data bytes transmitted is returned as \fIcc\fR. See \fIsendto(2)\fR
+for more information on this system call.
+.sh 3 "Receiving raw datagrams"
+.pp
+The format of the \fIrecvfrom()\fR system call is:
+.(b
+\fC
+.TS
+tab(+);
+l s s.
+cc = recvfrom(s, buf, len, flags, from, fromlen)
+.T&
+l l l.
+int+cc,s;
+char+*buf;
+int+len,flags;
+struct sockaddr+*from;
+int+*fromlen;
+.TE
+\fR
+.)b
+When used with a CLNP raw socket \fIs\fR, \fIrecvfrom()\fR will read a
+NPDU from the CLNS. If no packet is available, the call will block.
+\fIBuf\fR specifies a buffer of \fIlen\fR bytes into which the NPDU will
+be read. The entire packet, including the header, will be read into the
+buffer. The \fIflags\fR parameter is unused, and should be zero. If
+\fIfrom\fR is non-zero, the source address of the NPDU is filled in.
+\fIFromlen\fR is a value-result parameter, initialized to the size of
+the buffer associated with \fIfrom\fR, and modified on return to
+indicate the actual size of the address stored there. The total number
+of bytes received (header and data) is returned as \fIcc\fR.
+See \fIrecvfrom(2)\fR for more information about this system call.
+.sh 1 "Connection Oriented Network Service"
+.pp
+The ARGO Connection Oriented Network Service (CONS) is not a complete
+implementation of the
+OSI network service.
+It is that subset of the OSI network service that is used
+by ARGO Transport and by ARGO CLNP.
+.\" FIGURE
+.so figs/NS_primitives.nr
+.pp
+.CF
+shows which CONS service elements are provided.
diff --git a/share/doc/iso/wisc/parts.nr b/share/doc/iso/wisc/parts.nr
new file mode 100644
index 0000000..1cd70b1
--- /dev/null
+++ b/share/doc/iso/wisc/parts.nr
@@ -0,0 +1,39 @@
+.\"$Header: parts.nr,v 1.1 88/12/05 18:10:50 nhall Exp $
+.\"$Source: /usr/argo/doc/kernel/RCS/parts.nr,v $
+.\"
+.\"
+.\" LOOK FOR ALL CASES OF 'writing' (as in, "at this writing")
+.\" to be sure you've updated everything before distributing this!
+.\"
+.\"This file uses -me and tbl macros.
+.so macros.nr
+.pn 1
+.IC
+.IF
+.(l C
+.sz 16
+Wisconsin ARGO Kernel Programmer's Guide for
+Academic Information Systems 4.3
+
+Current source: argo:/usr/argo/doc/kernel
+.sz 8
+.)l
+.so ../icon/ARGO.nr
+.he 'ARGO Kernel Programmer\'s Guide'''
+.fo '%''\*(td'
+.bp
+.\".so intro.nr
+.\".so def.nr
+.\".so trans_serv.nr
+.\".so net_serv.nr
+.\".so ipc.nr
+.\".so addr.nr
+.so trans_design.nr
+.\".so net_design.nr
+.\".so errors.nr
+.\".so debug.nr
+.\".so appendix_a.nr
+.\".so appendix_b.nr
+.\".fo ''Table of Contents''
+.bp
+.xp
diff --git a/share/doc/iso/wisc/preview b/share/doc/iso/wisc/preview
new file mode 100755
index 0000000..6069d4e
--- /dev/null
+++ b/share/doc/iso/wisc/preview
@@ -0,0 +1,7 @@
+#! /bin/csh -f
+echo $argv
+set dev=fa
+foreach m ($argv)
+ grn -P$dev $m.grn > $m.nr
+ ditroff -P$dev $m.nr
+end
diff --git a/share/doc/iso/wisc/program.nr b/share/doc/iso/wisc/program.nr
new file mode 100644
index 0000000..dfb3305
--- /dev/null
+++ b/share/doc/iso/wisc/program.nr
@@ -0,0 +1,51 @@
+.\"$Header: program.nr,v 1.1 88/12/05 18:10:57 nhall Exp $
+.\"$Source: /usr/argo/doc/kernel/RCS/program.nr,v $
+.\"
+.\"
+.\" FONT CONVENTIONS
+.\"
+.\" \fIprocedure()\fR
+.\" \fIsyscall()\fR
+.\" \fImanpage(3)\fR
+.\" \fIdata_structure_name\fR
+.\" \fC/file/or/directory/name\fR
+.\" \fC
+.\" section of code
+.\" \fR
+.\"
+.\"
+.\" LOOK FOR ALL CASES OF 'writing' (as in, "at this writing")
+.\" to be sure you've updated everything before distributing this!
+.\"
+.\"This file uses -me and tbl macros.
+.so macros.nr
+.pn 1
+.IC
+.IF
+.(l C
+.sz 16
+Wisconsin ARGO 1.0 Kernel Programmer's Guide for
+Academic Operating Systems 4.3
+.sz 8
+.)l
+.so ../icon/ARGO.nr
+.he 'ARGO 1.0 Kernel Programmer\'s Guide'''
+.fo '%''December 9, 1988'
+.bp
+.so intro.nr
+.so def.nr
+.so trans_serv.nr
+.so net_serv.nr
+.so ipc.nr
+.so addr.nr
+.so trans_design.nr
+.so net_design.nr
+.so eicon.nr
+.so errors.nr
+.so debug.nr
+.so appendix_a.nr
+.\" Leave manual pages out!
+.\".so appendix_b.nr
+.fo ''Table of Contents''
+.bp
+.xp
diff --git a/share/doc/iso/wisc/trans_design.nr b/share/doc/iso/wisc/trans_design.nr
new file mode 100644
index 0000000..6aeb54a
--- /dev/null
+++ b/share/doc/iso/wisc/trans_design.nr
@@ -0,0 +1,1466 @@
+.NC "The Design of the ARGO Transport Entity"
+.sh 1 "Protocol Hooks"
+.pp
+The design of the AOS kernel IPC support to some
+extent mandates the
+design of protocols.
+Each protocol must provide the following
+protocol hooks, which are procedures called through a
+protocol switch table
+(an array of type \fIprotosw\fR as described in
+Chapter Five.
+.ip "pr_input()" 5
+Called when data are to be passed up from a lower layer.
+.ip "pr_output()" 5
+Called when data are to be passed down from a higher layer.
+.ip "pr_init()" 5
+Called when the system is brought up.
+.ip "pr_fasttimo()" 5
+Called every 200 milliseconds by the clock functional unit.
+.ip "pr_slowtimo()" 5
+Called every 500 milliseconds by the clock functional unit.
+.ip "pr_drain()" 5
+This is meant to be called when buffer space is low.
+Each protocol is expected to provide this routine to free
+non-critical buffer space.
+This is not yet called anywhere.
+.ip "pr_ctlinput()" 5
+Used for exchanging information between
+protocols, such as notifying a transport protocol of changes
+in routing or configuration information.
+.ip "pr_ctloutput()" 5
+Supports the protocol-dependent
+\fIgetsockopt()\fR
+and
+\fIsetsockopt()\fR
+options.
+.ip "pr_usrreq()" 5
+Called by the socket code to pass along a \*(lquser request\*(rq -
+in other words a service primitive.
+This call is also used for other protocol functions.
+The functions served by the \fIpr_usrreq()\fR routine are:
+.ip " PRU_ATTACH" 10
+Creates a protocol control block and attaches it to a given socket.
+Called as a result of a \fIsocket()\fR system call.
+.ip " PRU_DISCONNECT" 10
+Called as a result of a
+\fIclose()\fR system call.
+Initiates disconnection.
+.ip " PRU_DETACH" 10
+Disassociates a protocol control block from a socket and recycles
+the buffer space used for the protocol control block.
+Called after PRU_DISCONNECT.
+.ip " PRU_SHUTDOWN" 10
+Called as a result of a
+\fIshutdown()\fR system call.
+If the protocol supports the notion of half-open connections,
+this closes the connection in one direction or both directions,
+depending on the arguments passed to
+\fIshutdown\fR.
+.ip " PRU_BIND" 10
+Gives an address to a socket.
+Called as a result of a
+\fIbind()\fR system call, also
+when
+socket without a bound address is used.
+In the latter case, an unused transport suffix is located and
+bound to the socket.
+.ip " PRU_LISTEN" 10
+Called as a result of a
+\fIlisten()\fR system call.
+Marks the socket as willing to queue incoming connection
+requests.
+.ip " PRU_CONNECT" 10
+Called as a result of a
+\fIconnect()\fR system call.
+Initiates a connection request.
+.ip " PRU_ACCEPT" 10
+Called as a result of an
+\fIaccept()\fR system call.
+Dequeues a pending connection request, or blocks waiting for
+a connection request to arrive.
+In the latter case, it marks the socket as willing to accept
+connections.
+.ip " PRU_RCVD" 10
+The protocol module is expected to have put incoming data
+into the socket's receive buffer, \fIso_rcv\fR.
+When a receive primitive is used
+(\fIrecv(), recvmsg(), recvfrom(),
+read(), readv(), \fRand
+\fIrecvv()\fR system calls)
+the socket code module copies data from the
+\fIso_rcv\fR to the user's
+address space.
+The protocol module may arrange to be informed each time the socket code
+does this, in which case the socket code calls \fIpr_usrreq\fR(PRU_RCVD)
+after the data were copied to the user.
+.ip " PRU_SEND" 10
+This performs the protocol-dependent part of a send primitive
+(\fIsend(), sendmsg(), sendto(), write(), writev(),
+\fRand \fIsendv()\fR system calls).
+The socket code
+(procedures \fIsendit() and \fIsosend()\fR)
+moves outgoing data from the user's
+address space into a chain of \fImbufs\fR.
+The socket code takes as much data from the user as it
+determines will fit into the outgoing socket buffer, so_snd.
+It passes this much data in the form of an mbuf chain to the protocol
+via \fIpr_usrreq\fR(PRU_SEND).
+If there are more data than
+the so_snd can accommodate,
+the socket code, which is running on behalf of a user process,
+puts the user process to sleep.
+The protocol module is expected to wake up the user process when
+more room appears in so_snd.
+.ip " PRU_ABORT" 10
+Called when a socket is closed and that socket
+is accepting connections and has
+queued pending
+connection requests or
+partially open connections.
+.ip " PRU_CONTROL" 10
+Called as a result of an
+\fIioctl()\fR system call.
+.ip " PRU_SENSE" 10
+Called as a result of an
+\fIfstat()\fR system call.
+.ip " PRU_RCVOOB" 10
+Performs the work of receiving \*(lqout-of-band\*(rq data.
+The socket module has already allocated an mbuf into which
+the protocol module is expected to put the incoming
+\*(lqout-of-band\*(rq data.
+The socket code will then move the data from this mbuf
+to the user's address space.
+.ip " PRU_SENDOOB" 10
+Performs the work of sending \*(lqout-of-band\*(rq data.
+The socket module has already moved the data
+from the user's address space into a chain of mbufs,
+which it now passes to the protocol module.
+.ip " PRU_SOCKADDR" 10
+Supports the system call
+\fIgetsockname()\fR.
+Puts the socket's bound address into an mbuf.
+.ip " PRU_PEERADDR" 10
+Supports the system call
+\fIgetpeername\fR().
+Puts the peer's address into an mbuf.
+.ip " PRU_CONNECT2" 10
+This is used in the Unix domain to support pipes.
+It is not generally supported by transport protocols.
+.ip " PRU_FASTTIMO, PRU_SLOWTIMO" 10
+These are superfluous.
+None of the transport protocols uses them.
+.ip " PRU_PROTORCV, PRU_PROTOSEND" 10
+None of the transport protocols uses these.
+.ip " PRU_SENDEOT" 10
+This was added to support TP.
+This indicates that the end of the data sent in this
+send primitive should
+be marked by the protocol as the end of the TSDU.
+.sh 1 "The Interface Between the Transport Entity and Lower Layers"
+.pp
+The transport layer may run over a network layer such as IP
+or the ISO connectionless network layer,
+or it may run over a multi-purpose layer such as the service
+provided by X.25.
+X.25 is viewed as a network layer when
+TP runs over X.25, and as a
+subnetwork layer
+when IP is running over X.25.
+The software interface between data link and network layers differs
+considerably from the software interface between transport and network
+layers in AOS.
+For this reason some modification of the transport-to-lower-layer
+interface is necessary to support the suite of protocols included in
+ARGO.
+.pp
+In AOS it is assumed that the transport layer will run over one
+and only one network layer, and therefore it may call the
+network layer output procedure directly.
+In order to allow TP to run over a set of lower layers,
+all domain-specific functions have been put into a set of routines
+that are called indirectly through a domain-specific switch table.
+The primary reason for this is that the transport and network
+layers share information, mostly information pertaining to addresses.
+The protocol control blocks for different network layers
+differ, so the transport layer cannot just directly
+access the network layer's pcb.
+Similarly, a network layer may not directly access the transport
+pcb because a multitude of transport protocols can run over each
+of the network protocols.
+.pp
+To permit different network-layer protocol control blocks to coexist
+under one transport layer, all transport-dependent control
+information was put into a transport-specific protocol control block.
+A new field, \fIso_tpcb\fR,
+was added to the \fIsocket\fR structure to hold a pointer to
+the transport-layer protocol control block.
+The existing
+field \fCso_pcb\fR is used for the network layer pcb.
+.pp
+The following structure was added to allow domain-specific
+functions to be called indirectly.
+All these functions operate on a network-layer pcb.
+.pp
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct nl_protosw {
+.T&
+l l l l.
++int+nlp_afamily;+/* address family */
++int+(*nlp_putnetaddr)();+/* puts addrs in pcb */
++int+(*nlp_getnetaddr)();+/* gets addrs from pcb */
++int+(*nlp_putsufx)();+/* transp suffix -> pcb */
++int+(*nlp_getsufx)();+/* gets t-suffix */
++int+(*nlp_recycle_suffix)();+/* zeroes suffix */
++int+(*nlp_mtu)();+/* get maximum
++++transmission unit size */
++int+(*nlp_pcbbind)();+/* bind to pcb */
++int+(*nlp_pcbconn)();+/* connect */
++int+(*nlp_pcbdisc)();+/* disconnect */
++int+(*nlp_pcbdetach)();+/* detach pcb */
++int+(*nlp_pcballoc)();+/* allocate a pcb */
++int+(*nlp_output)();+/* emit packet */
++int+(*nlp_dgoutput)();+/* emit datagram */
++caddr_t+nlp_pcblist;+/* list of pcbs
++++for management
++++of connections */
+};
+.TE
+\fR
+.)b
+.lp
+The switch is based on the address family chosen when the
+\fIsocket()\fR system call is made prior to connection establishment.
+This unfortunately ties the address family to the domain,
+but the only alternative is to add an argument to the \fIsocket()\fR
+system call to let the user specify the desired network layer.
+In the case of a connection oriented environment with no multi-homing,
+it would be possible to determine which network layer is to be
+used
+from routing
+information, but to do this requires unrealistic assumptions
+about the environment.
+For these reasons, linking the address family to the network
+layer protocol is seen as the least of the evils.
+The transport suffixes are kept in the network layer's pcb
+as well as in the transport layer because
+full transport address pairs are used to identify a connection
+in the Internet domain.
+.sh 1 "The Architecture of the Transport Protocol Entity"
+.pp
+A set of protocol hooks is required
+by the AOS IPC architecture.
+These hooks are used by the protocol-independent parts of the kernel
+to gain entry to protocol-specific code.
+The protocol code can be entered in one of the following ways:
+.ip "1) " 5
+at boot time, when autoconfiguration
+initializes each protocol through
+the
+\fIpr_init()\fR
+hook,
+.ip "2) " 5
+from above, either
+a user program making a system call, through
+the \fIpr_usrreq()\fR or \fIpr_ctloutput()\fR hooks, or
+from a higher layer protocol using the
+\fIpr_output()\fR hook,
+.ip "3) " 5
+from below, a device interrupt servicing an incoming packet
+through the \fIpr_input()\fR and \fIpr_ctlinput()\fR hooks, and
+.ip "4) " 5
+from a clock interrupt through the \fIpr_slowtimo()\fR
+or the
+\fIpr_fasttimo()\fR hook.
+.\" FIGURE
+.so figs/trans_flow.nr
+.\".so figs/trans_flow.grn
+.pp
+The protocol code can be divided into
+the following modules, which are described in more detail below.
+.CF
+shows the flow of data and control
+among these modules.
+.in +5
+.ip "Timers and References:" 5
+The code executed on behalf of \fIpr_slowtimo()\fR.
+The fast timeout is not used by TP.
+.ip "Driver:" 5
+This is the finite state machine for TP.
+.ip "Input: " 5
+This is the module that decodes incoming packets,
+identifies or creates the pcb for which
+the packet is destined, and creates an "event" to
+pass to the driver.
+.ip "Output:" 5
+This is the module that creates a packet header of a given type
+with fields containing
+values that are appropriate to the connection
+on which the packet is being sent, appends data if necessary,
+and hands a packet
+to the lower layer, according to the transport-to-lower-layer
+interface.
+.ip "Send: " 5
+This module packetizes data from the outbound
+socket buffer, \fIso_snd\fR,
+handles retransmissions of packetized data, and
+drops packetized data from the retransmission queue.
+.ip "Receive:" 5
+This module reorders packets if necessary,
+depacketizes data, passes it to the socket code module,
+and determines when acknowledgments should be sent.
+.in -5
+.sh 1 "Timers and References"
+.pp
+TP identifies sockets by \fIreference numbers\fR, or
+\fIreferences\fR,
+which are \*(lqfrozen\*(rq (may not be reassigned)
+until some locally defined time after
+a connection is broken and its protocol control block
+is discarded.
+An array of \fIreference blocks\fR is maintained by TP.
+The reference number of a reference block is its
+offset in the array.
+When a reference block is in use it contains
+a pointer to the pcb for the socket to which the
+reference applies.
+.pp
+The system clock calls the \fIpr_slowtimo()\fR and
+\fIpr_fasttimo()\fR hooks for each protocol in the protocol switch table
+every 500 and 200 microseconds, respectively.
+Each protocol handles its own timers its own way.
+The timers in TP take two forms
+- those that typically are cancelled and
+those that usually expire.
+The latter form may have more than one instantiation at any given
+time.
+The former may not.
+The two are implemented slightly
+differently for the sake of performance.
+.pp
+The timers that normally expire
+are kept in a queue, their values all relative
+to the value of preceding timer.
+Thus all timer values are decremented by a single
+operation on the value of the first timer.
+The timer is represented by the Ecallout structure:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct Ecallout {
+.T&
+l l l l.
++int+c_time;+/* incremental time */
++int+c_func;+/* function to call */
++u_int+c_arg1;+/* argument to routine */
++u_int+c_arg2;+/* argument to routine */
++int+c_arg3;+/* argument to routine */
++struct Ecallout+*c_next;
+};
+.TE
+\fR
+.)b
+.lp
+When an Ecallout structure migrates to the head
+of the E timer list, and its \fIc_time\fR
+field is decremented to zero,
+the function stored in \fIc_func\fR is
+called, with \fIc_arg1, c_arg2\fR, and \fIc_arg3\fR
+as arguments.
+Setting and cancelling these timers
+are accomplished by a linear search and one
+insertion or deletion from the timer queue.
+This queue is linked to the
+reference block associated with a communication endpoint.
+This form used for the reference timer
+and for the retransmission timers for data TPDUs.
+.pp
+The second form of timer, the type that
+typically is cancelled, is used for several
+timers - the inactivity timer, the sendack timer,
+and the retransmission
+timer for all types of TPDUs except data TPDUs.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct Ccallout {
+.T&
+l l l l.
++int+c_time;+/* incremental time */
++int+c_active;+/* this timer is active? */
+};
+.TE
+\fR
+.)b
+.lp
+All of these timers are stored
+directly
+in the reference block.
+These timers are decremented in one linear scan of
+the reference blocks.
+Cancelling, setting, and both
+cancelling and resetting one of these timers is accomplished by a
+single assignment to an array element.
+.sh 1 "Driver"
+.pp
+This is the finite state machine for TP.
+A connection is managed by the finite state machine (fsm).
+All events that pertain to a connection cause the
+finite state machine driver to be called.
+The driver takes two arguments - the pcb for the connection
+and an event structure.
+The event structure contains a field that discriminates
+the different types of events, and a union of
+structures that are specific to the event types.
+The driver evaluates a set of predicates based on the current
+state of the finite state machine (which is kept in the pcb) and the event type.
+The result of the predicate evaluation determines
+a set of actions to take and a state transition.
+The driver takes the actions and if they complete
+without errors, the driver makes the state transition.
+.pp
+The states, event types, predicates, actions, and state transitions are all
+specified as a \fIxebec transition file\fR.
+\fIXebec\fR is a utility that takes a human-readable description
+of a finite state machine
+and produces a set of tables and C source code for the driver.
+The driver procedure is called \fItp_driver()\fR.
+It is located in a file generated by xebec,
+\fCtp_driver.c\fR.
+For more details about xebec, see the manual page \fIxebec(1)\fR.
+.pp
+The transition file for TP is \fCtp.trans\fR,
+and it is a good place to begin a perusal of the TP
+source code.
+.sh 1 "Input"
+.pp
+This is the module that decodes an incoming packet,
+locates or creates the pcb for which
+the packet is destined, and creates an event to
+pass to the driver.
+The network layer passes a packet up to the appropriate
+transport layer by indirectly calling a transport input
+routine through the protocol switch table for the network
+domain.
+There is one protocol switch entry for TP for each domain in which
+TP will run (Internet, ISO).
+In the Internet domain, the protocol switch field \fIpr_input()\fR
+takes the value \fItpip_input()\fR.
+This procedure accepts a packet from IP, with the IP header
+still intact.
+It extracts the network addresses from the IP header,
+strips the IP header, and calls the domain-independent
+input procedure for TP,
+\fItp_input()\fR.
+\fITp_input()\fR
+decodes a TPDU.
+The multitude of options, the variable-length
+nature of the options, the semantics of the
+options, and the possible combinations of concatenated
+TPDUs make this a
+complex procedure.
+It is sensitive to changes, and from
+the point of view of a software maintenance, it is a
+potential hazard.
+Because it is in the
+critical path of TP however, some compromise
+was made between maintainability and efficiency.
+Multiple copies of sections of code were avoided as much as
+possible,
+not for the sake of saving space, but rather for the sake
+of maintainability.
+Ironically,
+this detracts somewhat from the readability of the code.
+.pp
+Once a TPDU has been decoded and a pcb has been
+identified for the TPDU,
+the appropriate fields of the TPDU
+are extracted and their values are placed in
+an event structure.
+Finally, \fItp_driver()\fR is called with
+the event structure and the pcb as parameters.
+.sh 1 "Output"
+.pp
+This module creates a TPDU header of a given type
+with field values that are appropriate to the connection
+on which the TPDU is being sent, appends data if necessary,
+and hands a TPDU
+to the lower layer according to the transport-to-lower-layer
+interface.
+Whenever a TPDU is to be sent to the peer or prospective peer,
+the function \fItp_emit()\fR
+is called, passing as arguments the pcb a TPDU type and several miscellaneous
+other type-specific arguments, possibly including some data.
+The data are in the form of an mbuf chain.
+\fITp_emit()\fR prepends to the data an mbuf containing a TP header,
+fills in the fields of the header according to the parameters
+given, performs the checksum if appropriate, and
+calls a domain-specific output routine.
+For the Internet domain, this output routine is
+\fItpip_output()\fR, which takes
+as arguments the mbuf chain representing the TPDU,
+and a network level pcb.
+Some protocol errors cannot be associated with
+a connection
+but require that TP issue
+an ER TPDU or a DR TPDU.
+When these errors occur the routine
+\fItp_error_emit()\fR is called.
+This procedure creates the appropriate type of TPDU
+and passes it to a domain-dependent routine for transmitting datagrams.
+In the Internet domain,
+\fItpip_output_dg()\fR is called.
+This takes as arguments an mbuf chain representing the TPDU,
+a source network address, and a destination network address.
+.sh 1 "Send"
+.\" FIGURE
+.so figs/mbufsnd.nr
+.\".so figs/mbufsnd.grn
+.pp
+This module packetizes data from the outbound
+socket buffer, \fIso_snd\fR,
+handles retransmissions of packetized data, and
+drops packetized data from the retransmission queue.
+The major routine in this module is \fItp_send()\fR, which
+takes a range of sequence numbers as arguments.
+For each sequence number in the range,
+it packetizes the an appropriate amount
+of outbound data, and places the resulting TPDU on
+a retransmission control queue subject to the
+constraints imposed by the rules of expedited data,
+maximum packet sizes, and end-of-TSDU markers.
+.pp
+The most complicating factor is that of managing
+expedited data.
+A normal datum may not be sent (for its first time) before the
+acknowledgment of any expedited datum
+that was received from the user after the
+normal datum was received.
+In order to enforce this rule,
+each TPDU must be marked in some way
+so that it will be known which expedited datum
+must be delivered and acknowledged by the peer before this TPDU may be transmitted
+for the first time.
+Markers are placed in \fIso_snd\fR
+when an
+outgoing expedited datum arrives from the user.
+A marker is an mbuf structure with an \fIm_len\fR
+of zero, but with the data area nevertheless containing
+the sequence number of an expedited data TPDU.
+The \fIm_type\fR of a marker is a new type, MT_XPD.
+.pp
+\fITp_send()\fR stops packetizing data when it encounters a marker
+for an unacknowledged expedited datum.
+If it encounters a marker for an expedited TPDU that has already
+been acknowledged, the marker is jettisoned.
+.CF
+illustrates the structure of the sending socket buffer used
+for normal data.
+.pp
+When \fItp_send()\fR moves data from mbufs on \fIso_snd\fR to the retransmission
+control queue, it needs to know
+how many octets of data can be placed in each TPDU.
+The appropriate amount depends on, among other things,
+the maximum transmission unit of the network layer
+on the route the packet will take.
+To determine the maximum transmission unit,
+TP queries the network layer through
+the domain-dependent switch table's field, \fInl_mtu\fR.
+In the Internet domain, this resolves to \fItp_inmtu()\fR.
+The header sizes for the network and transport layers
+also affect the amount of data that can go into a packet,
+and these sizes depend on the connection's characteristics.
+.pp
+Once the maximum amount of data per TPDU is determined,
+\fItp_send()\fR can pull this amount off the \fIso_snd\fR queue to form
+a TPDU,
+assign a TPDU sequence number,
+and place the new TPDU on the
+retransmission control queue.
+The retransmission control queue is a list of mbuf chains.
+Each mbuf chain represents one TPDU, preceded by an
+\fIrtc structure\fR:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct tp_rtc {
+.T&
+l l l l.
++struct tp_rtc+*tprt_next;+/* next rtc struct in list */
++SeqNum+tprt_seq;+/* seq # of this TPDU */
++int+tprt_eot;+/* end of TSDU? */
++int+tprt_octets;+/* # octets in this TPDU */
++struct mbuf+*tprt_data;+/* ptr to the octets of data */
+.\"/* Performance measurment info: */
+.\"int tprt_window; /* in which call to tp_send() was
+.\" * this TPDU formed?
+.\" */
+.\"struct timeval tprt_sess_time; /* time session received the
+.\" * majority of the data for this packet on send;
+.\" * on recv, this is the time it's given to session
+.\" */
+.\"struct timeval tprt_net_time; /* time first copy was given to net layer
+.\" * on send; on receive it's the time received from
+.\" * the network
+.\" */
+};
+.TE
+\fR
+.)b
+.lp
+Once TPDUs are on the retransmission control queue,
+they are retransmitted or dropped by the actions
+of timers.
+The procedure \fItp_sbdrop()\fR
+removes the TPDUs from the retransmission queue.
+It takes a sequence number as an argument and drops
+all TPDUs up to and including the TPDU with that sequence number.
+.pp
+When an AK TPDU arrives, the values from
+its credit and sequence number fields
+are passed to \fItp_goodack()\fR, which
+determines whether or not the AK brought any news with it,
+and therefore whether TP can send more data
+or expedited data.
+If this AK acknowledges something heretofore unacknowledged,
+\fItp_goodack()\fR drops the appropriate TPDU(s) from the retransmission
+control list, computes the smoothed average round trip time
+and standard deviation of the round trip time,
+and updates
+the retransmission timer based on these statistics.
+It sets a flag in the pcb if the TP entity is obliged to
+send the flow control confirmation parameter on its next
+AK TPDU.
+\fITp_goodack()\fR returns true if the AK brought some news with it,
+either with respect to a change in credit or with respect to
+new acknowledgments.
+.pp
+The function \fItp_goodXack()\fR is called when an XAK TPDU
+arrives.
+It takes the XAK sequence number as an argument and
+determines if the XAK acknowledges the last XPD TPDU sent.
+If so, it drops the expedited data from the outgoing
+expedited data buffer.
+By its definition in the TP specification,
+the expedited data stream has a window
+of size 1,
+that is,
+only one expedited datum (packet) can be buffered
+at a time.
+\fITp_goodXack()\fR returns true if the XAK acknowledged
+the last XPD TPDU sent and the data were dropped,
+and it returns false if the acknowledgment caused no action to be taken.
+.\" NEXT FIGURE
+.so figs/mbufrcv.nr
+.\".so figs/mbufrcv.grn
+.sh 1 "Receive"
+.pp
+This module reorders incoming TPDUs if necessary,
+depacketizes data, passes it to the socket code module,
+and determines when acknowledgments should be sent.
+The function
+\fItp_stash()\fR
+takes an DT TPDU as an argument, and if the TPDU is not in
+sequence, it saves the TPDU in a \fItp_rtc\fR structure in
+a list, with the TPDUs
+kept in order.
+When the next expected TPDU arrives, the
+list of out-of-order TPDUs is scanned for
+more TPDUs in sequence, updating
+a field in the pcb, \fItp_rcvnxt\fR which
+always contains the sequence
+number of
+the next expected TPDU.
+If an acknowledgment is to be generated
+at any time, the value of tp_rcvnxt goes into the
+\fIYR-TU-NR\fR\** field of the acknowledgment TPDU.
+.(f
+\**
+This is the name used in ISO 8073 for the field
+which indicates the sequence number of the next expected DT TPDU.
+.)f
+.pp
+\fITp_stash()\fR returns true if an acknowledgment needs to be generated
+immediately, false not.
+The acknowledgment strategy is therefore implemented in this routine.
+Acknowledgments may be generated for one or more of several reasons,
+listed below.
+\fITp_stash()\fR increments a counter for each of these reasons
+for which an acknowledgment is generated, and a counter for TPDUs
+that are not acknowledged immediately.
+.ip "ACK_STRAT_EACH" 5
+The acknowledgment strategy in use calls for acknowledging each
+data packet with an AK TPDU.
+.ip "ACK_STRAT_FULLWIN" 5
+The acknowledgment strategy in use calls for acknowledging
+upon receiving the DT TPDU that represents the upper window
+edge of the last advertised window.
+.ip "ACK_DUP" 5
+A duplicate data TPDU was received.
+.ip "ACK_REORDER" 5
+A DT TPDU arrived in the window but out of order.
+.ip "ACK_EOT" 5
+A DT TPDU arrived, and it had the end-of-TSDU flag set.
+.pp
+Upon receipt of a DT TPDU that is in order, and upon reordering
+DT TPDUs,
+\fItp_stash()\fR
+places the TSDUs into the socket's receive
+socket buffer, \fIso->so_rcv\fR in mbuf chains, with
+TSDUs delimited by mbufs of the \fIm_type\fR MT_EOT,
+which is a new type with the ARGO kernel.
+.CF
+illustrates the structure of the receiving socket buffer used
+for normal data.
+.pp
+A separate socket buffer, \fItpcb->tp_Xrcv\fR,
+is used for
+buffering expedited data.
+Only one expedited data packet may reside in this buffer at a time
+because the TP standard limits the size of the window on expedited flow
+to be 1.
+This means the data structures are straightforward;
+there is no need to distinguish between separate TSDUs in this socket buffer.
+.pp
+Credit is determined
+by dividing the total amount of available
+space in the receive buffer
+by the negotiated maximum TPDU size.
+TP can often offer a larger credit than this if it uses
+an average of the measured actual TPDU sizes.
+This strategy was once an option in the ARGO kernel,
+but it was removed because unless the actual TPDU size
+is constant, it leads to reneging of credit,
+retransmissions, and decreased performance.
+It does not work well when there is any fluctuation in the sizes
+of TPDUs and it carries the penalty of lengthening the critical path
+of the TP entity.
+.sh 1 "Major Data Structures and Types"
+.pp
+In addition to the types commonly used in the kernel,
+such as
+.(b
+\fC
+.TS
+tab(+);
+l l l l.
+ +typedef+unsigned char+u_char;
+ +typedef+unsigned int+u_int;
+ +typedef+unsigned short+u_short;
+.TE
+\fR
+.)b
+TP uses the following types:
+.(b
+\fC
+.TS
+tab(+);
+l l l l.
+ +typedef+unsigned int+SeqNum
+ +typedef+unsigned short+RefNum;
+ +typedef+int+ProtoHook;
+.TE
+\fR
+.)b
+.pp
+Sequence numbers can be either 7 or 31 bits.
+An unsigned integer is used in all cases, and the proper type
+of arithmetic is performed with bit masks.
+Reference numbers are 16 bits.
+ProtoHook is the type of the procedures that are in switch
+tables, which,
+although they are not functions,
+are declared \fIint\fR rather than \fIvoid\fR
+to be consistent with the rest of the kernel.
+.pp
+The following structures are fundamental
+types used throughout TP,
+in addition to those already described in the
+section,
+"The Design of the Transport Entity".
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct tp_ref {
+.T&
+l l l l.
++u_char+tpr_state;+/* REF_FROZEN...*/
++struct Ccallout+tpr_callout[N_CTIMERS];+/* C timers */
++struct Ecallout+tpr_calltodo;+/* E timers list */
++struct tp_pcb+*tpr_pcb;+/* --> PCB */
+};
+.TE
+\fR
+.)b
+.lp
+The reference structure is logically a part of the protocol
+control block and it is linked to a pcb, but it may outlive
+a pcb.
+When a connection is dissolved, the pcb may be recycled
+but the reference structure must remain until the reference
+timer goes off.
+The field \fItpr_state\fR takes the values
+REF_FROZEN (a reference timer is ticking),
+REF_OPEN (in use, has timers and an associated pcb),
+REF_OPENING (has a pcb but no timers), and
+REF_FREE (free to reallocate).
+.pp
+The TP protocol control block is too large to fit into
+one mbuf structure so it comprises two structures
+linked together, the
+\fItp_pcb\fR structure and the.
+\fItp_pcb_aux\fR structure.
+The \fItp_pcb_aux\fR structure contains
+items that are used less frequently than those in
+the former structure, since each access to these
+items requires a second pointer dereference.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct tp_pcb_aux {
+.T&
+l l l s.
+ +struct sockbuf+tpa_Xsnd;+/* for expedited data */
++struct sockbuf+tpa_Xrcv;+/* for expedited data */
++u_char +tpa_vers;+/* protocol version */
++u_char +tpa_peer_acktime;+/* to compute DT TPDU
++++retrans timer value */
++SeqNum+tpa_Xsndnxt;+/* seq # of
++++next XPD to send */
++SeqNum+tpa_Xuna;+/* seq # of
++++unacked XPD */
++SeqNum+tpa_Xrcvnxt;+/* next XPD seq #
++++expect to recv */
++/* addressing */
++u_short+tpa_domain;+/* domain AF_ISO,...*/
++u_short+tpa_fsuffixlen;+/* foreign suffix */
++u_char+tpa_fsuffix[MAX_TSAP_SEL_LEN];+
++u_short+tpa_lsuffixlen;+/* local suffix */
++u_char+tpa_lsuffix[MAX_TSAP_SEL_LEN];+
+.T&
+l s s s.
+ +/* AK subsequencing */
+.T&
+l l l s.
+ +u_short+tpa_s_subseq;+/* next subseq to send */
++u_short+tpa_r_subseq;+/* highest recv subseq */
+};
+.TE
+\fR
+.)b
+.pp
+The major portion of the protocol control block is in the
+\fItp_pcb\fR structure:
+.(b
+\fC
+.TS
+tab(%);
+l s s s.
+struct tp_pcb {
+.\" ***************************************
+.T&
+l l l l.
+.\" The next line sets the spacing for the table: 1+3 17+3 17+3 13+3
+ % % %
+.\"456789 123456789- 123456789 123456-789 123456789 1234567890
+.\"
+ %struct tp_ref%*tp_refp;%
+.T&
+l l l s.
+%%/* reference structure */%
+.\" ***************************************
+.T&
+l l l l.
+ %struct tp_pcb_aux%*tp_aux;%
+.T&
+l l l s.
+ %%/*rest of tpcb (auxiliary struct)*/%
+.\" ***************************************
+.T&
+l l l l.
+ %caddr_t%tp_npcb;%/* to ll pcb */
+%struct nl_protosw%*tp_nlproto;%
+.T&
+l l l s.
+ % %/* domain-dependent routines */%
+.\" ***************************************
+.T&
+l l l l.
+ %struct socket%*tp_sock;%/* back ptr */
+.\" ***************************************
+.T&
+l s s s.
+
+/* local and foreign reference numbers: */
+.T&
+l l l l.
+ %RefNum%tp_lref;%
+%RefNum%tp_fref;%
+.\" ***************************************
+.T&
+l s s s.
+.\"456789 123456789 123456789 123456789 123456789 1234567890
+
+/* Stuff for sequence space arithmetic:
+ * Maintaining 2 sequence spaces is a pain so we set these
+ * values once at connection establishment time. Sequence
+ * number arithmetic is a set of macros which uses these.
+ * Sequence numbers are stored as 32 bits.
+ * tp_seqmask tells which of the 32 bits is used.
+ * tp_seqibt is the lsb that is not used. When set,
+ * it indicates wraparound has occurred.
+ * tp_seqhalf is the value that is half the sequence space.
+ * (or half plus one).
+ */
+.T&
+l l l l.
+%u_int%tp_seqmask;%/* mask */
+%u_int%tp_seqbit;%/* wraparound */
+%u_int%tp_seqhalf;%/* half space */
+.\" ***************************************
+.T&
+l s s s.
+
+/* flags: values are defined in tp_user.h.
+ * Here we keep such info as which options
+ * are in use: checksum, extended format,
+ * flow control in class 2, etc.
+ * See tp(4p) man page.
+ */
+.\" ***************************************
+.T&
+l l l l.
+ %u_short%tp_state;%/* fsm */
+%short%tp_retrans;%
+.T&
+l l l s.
+ % % /* # times to retransmit */%
+.\" ***************************************
+.T&
+l s s s.
+
+/* credit & sequencing info for SENDING: */
+.T&
+l l l s.
+ %u_short%tp_fcredit;%
+ % %/* remote real window */%
+ %u_short%tp_cong_win;%
+ % %/* remote congestion window */%
+.\" ***************************************
+%SeqNum%tp_snduna;%
+.T&
+l l l s.
+ % %/* seq # of lowest unacked DT */%
+.\" ***************************************
+.T&
+l l l l.
+ %struct tp_rtc %*tp_snduna_rtc;%
+.T&
+l l l s.
+ % %/* ptr to mbufs containing lowest%
+%% * unacked TPDUs sent so far%
+%% */%
+.\" ***************************************
+.T&
+l l l l.
+ %SeqNum%tp_sndhiwat;%
+.T&
+l l l s.
+ % %/* highest DT sent yet */%
+.\" ***************************************
+.T&
+l l l l.
+ %struct tp_rtc%*tp_sndhiwat_rtc;%
+.T&
+l l l s.
+ % %/* ptr to mbufs containing the last%
+%% * DT sent - this is the last item %
+%% * on the list that starts%
+%% * at tp_snduna_rtc%
+%% */%
+.\" ***************************************
+.T&
+l l l l.
+ %int %tp_Nwindow;%/* for perf. measmt */
+.\" ***************************************
+.T&
+l s s s.
+
+/* credit & sequencing info for RECEIVING: */
+.\" ***************************************
+.T&
+l l l s.
+ %SeqNum%tp_sent_lcdt;%
+ %%/* cdt according to last AK sent */%
+ %SeqNum%tp_sent_uwe;%
+ % %/* upper window edge, according to%
+%% * the last AK sent %
+%% */*
+ %SeqNum%tp_sent_rcvnxt;%
+ % %/* rcvnxt, according to%
+%% * the last AK sent%
+%% */*
+.\" ***************************************
+.T&
+l l l l.
+ %short%tp_lcredit;%/* local */
+.\" ***************************************
+.T&
+l l l l.
+ %SeqNum%tp_rcvnxt;%
+.T&
+l l l s.
+ % %/* next DT seq# we expect to recv */%
+.\" ***************************************
+.T&
+l l l l.
+ %struct tp_rtc%*tp_rcvnxt_rtc;%
+.T&
+l l l s.
+ % %/* ptr to mbufs containing unacked %
+%% * DTs received out of order, and %
+%% * which we haven't acknowledged%
+%% */%
+.\" ***************************************
+.TE
+.TS
+tab(%);
+l s s s.
+/* Items kept in the aux structure: */
+
+.\" ***************************************
+.T&
+l s s l.
+#define tp_vers%tp_aux->tpa_vers
+#define tp_peer_acktime%tp_aux->tpa_peer_acktime
+#define tp_Xsnd%tp_aux->tpa_Xsnd
+#define tp_Xrcv%tp_aux->tpa_Xrcv
+#define tp_Xrcvnxt%tp_aux->tpa_Xrcvnxt
+#define tp_Xsndnxt%tp_aux->tpa_Xsndnxt
+#define tp_Xuna%tp_aux->tpa_Xuna
+#define tp_domain%tp_aux->tpa_domain
+#define tp_fsuffixlen%tp_aux->tpa_fsuffixlen
+#define tp_fsuffix%tp_aux->tpa_fsuffix
+#define tp_lsuffixlen%tp_aux->tpa_lsuffixlen
+#define tp_lsuffix%tp_aux->tpa_lsuffix
+#define tp_s_subseq%tp_aux->tpa_s_subseq
+#define tp_r_subseq%tp_aux->tpa_r_subseq
+.\" ***************************************
+.T&
+l s s s.
+ % % %
+/* parameters per-connection controllable by user: */
+.\" ***************************************
+.T&
+l l l l.
+ %struct%tp_conn_param%_tp_param;
+ % % %
+.\" ***************************************
+.T&
+l s s l.
+#define tp_Nretrans%_tp_param.p_Nretrans
+#define tp_dr_ticks%_tp_param.p_dr_ticks
+#define tp_cc_ticks%_tp_param.p_cc_ticks
+#define tp_dt_ticks%_tp_param.p_dt_ticks
+#define tp_xpd_ticks%_tp_param.p_x_ticks
+#define tp_cr_ticks%_tp_param.p_cr_ticks
+#define tp_keepalive_ticks%_tp_param.p_keepalive_ticks
+#define tp_sendack_ticks%_tp_param.p_sendack_ticks
+#define tp_refer_ticks%_tp_param.p_ref_ticks
+#define tp_inact_ticks%_tp_param.p_inact_ticks
+#define tp_xtd_format%_tp_param.p_xtd_format
+#define tp_xpd_service%_tp_param.p_xpd_service
+#define tp_ack_strat%_tp_param.p_ack_strat
+#define tp_rx_strat%_tp_param.p_rx_strat
+#define tp_use_checksum%_tp_param.p_use_checksum
+#define tp_tpdusize%_tp_param.p_tpdusize
+#define tp_class%_tp_param.p_class
+#define tp_winsize%_tp_param.p_winsize
+#define tp_netservice%_tp_param.p_netservice
+#define tp_no_disc_indications%_tp_param.p_no_disc_indications
+#define tp_dont_change_params%_tp_param.p_dont_change_params
+.\" ***************************************
+.TE
+.\" ***************************************
+.\" ***************************************
+.\" ***************************************
+.TS
+tab(%);
+l l l l.
+.\" The next line sets the spacing for the table: 1+3 17+3 17+3 13+3
+.\"456789 123456789- 123456789 123456-789 123456789 1234567890
+.\"
+.T&
+l l l s.
+ %%/* log2(the negotiated max size) */%
+.T&
+l l l l.
+ %int%tp_l_tpdusize;%/* # bytes */
+.\" ***************************************
+ %struct timeval%tp_rtt;%
+.T&
+l l l s.
+ % %/* smoothed avg round-trip time */%
+ %struct timeval%tp_rtv;%
+ % %/* std deviation of round-trip time */%
+%struct timeval%tp_rttemit[ TP_RTT_NUM + 1 ];%
+%%/* times that the last TP_RTT_NUM %
+%% * DT_TPDUs were transmitted %
+%% */%
+.\" ***************************************
+ %unsigned % %
+% tp_sendfcc:1,%/* shall next ack %
+% %include flow control conf. param? */%
+.\" ***************************************
+.T&
+l l l s.
+ % tp_trace:1,%/* is this pcb being traced?%
+%% * (not used yet) %
+%% */%
+.\" ***************************************
+% tp_perf_on:1,%/* statistics being kept? */%
+.\" ***************************************
+% tp_reneged:1,%/* have we reneged on credit%
+%% * since the last AK TPDU was sent? %
+%% */%
+% tp_decbit:4,%/* congestion experienced? */%
+% tp_flags:8,%/* see #defines below */%
+.\" ***************************************
+% tp_unused:16;%%
+.T&
+l s s l.
+#define TPF_XPD_PRESENT%TPFLAG_XPD_PRESENT
+#define TPF_NLQOS_PDN%TPFLAG_NLQOS_PDN
+#define TPF_PEER_ON_SAMENET%TPFLAG_PEER_ON_SAMENET
+%%%
+.\" ***************************************
+.T&
+l l l l.
+ %struct tp_pmeas%*tp_p_meas;%
+.T&
+l l l s.
+ % %/* ptr to mbuf to hold the perf.%
+%% * statistics structure %
+%% */%
+.\" ***************************************
+};
+.TE
+\fR
+.\"
+.\" end of tpcb structure (thank you)
+.\"
+.)b
+.fi
+.sh 1 "Sequence Number Arithmetic"
+.pp
+Sequence numbers in TP can be either 7 bits
+(\*(lqnormal format\*(rq)
+or 31 bits
+(\*(lqextended format\*(rq).
+Sequence numbers are unsigned integers,
+regardless of their format.
+Three fields are kept in the pcb to manage the sequence
+number arithmetic:
+.(b
+\fC
+.TS
+tab(+);
+l l l l.
+ +u_int+tp_seqmask;+/* mask for seq space */
+ +u_int+tp_seqbit;+/* bit for seq # wraparound */
+ +u_int+tp_seqhalf;+/* half the seq space */
+.TE
+\fR
+.)b
+.lp
+\fITp_seqmask\fR
+is a bit mask indicating which bits are legitimate
+for a sequence number of either format.
+It takes the value 0x7f if 7-bit sequence numbers are in use,
+and 0x7fffffff if 31-bit sequence numbers are in use.
+\fITp_seqbit\fR
+is the bit that becomes set when a sequence number wraps around
+while being incremented.
+Its value is 0x80 for normal format, 0x80000000 for extended format.
+\fITp_seqhalf\fR
+takes the value which is in the middle of the sequence space,
+0x40 for normal format,
+and
+0x40000000 for extended format.
+.(b
+.nf
+The macro
+.fi
+\fC
+.TS
+tab(+);
+l l l l.
+ SEQ(tpcb, x)
+.TE
+\fR
+.)b
+.lp
+extracts a sequence number from the location
+in which it is stored.
+.pp
+The macros
+.(b
+\fC
+.TS
+tab(+);
+l l s s l.
+ +SEQ_GT(tpcb, seq, t)+is seq > t?
+ +SEQ_GEQ(tpcb, seq, t)+is seq >= t?
+ +SEQ_LT(tpcb, seq, t)+is seq < t?
+ +SEQ_LEQ(tpcb, seq, t)+is seq <= t?
+ +SEQ_INC(tpcb, seq)+seq\+\+
+ +SEQ_DEC(tpcb, seq)+seq--
+ +SEQ_SUB(tpcb, seq, amt)+seq -= amt
+ +SEQ_ADD(tpcb, seq, amt)+seq \+= amt
+.TE
+\fR
+.)b
+.lp
+perform the indicated comparisons and arithmetic
+on their arguments.
+.pp
+An example of how these macros
+are used is as follows.
+To determine if a sequence
+number \fIseq\fR is in a receive window
+bounded by
+\fIlwe\fR and \fIuwe\fR,
+we define the
+macro
+.(b
+\fC
+.TS
+tab(+);
+l l.
+#define+IN_RWINDOW(tpcb, seq, lwe, uwe)\\
++( SEQ_GEQ(tpcb, seq, lwe) && SEQ_LT(tpcb, seq, uwe) )
+.TE
+\fR
+.)b
+.sh 1 "TP Implementation Options"
+.pp
+The transport protocol specification leaves several
+things to the discretion of the implementor,
+some of which may affect the performance
+of individual connections and
+aggregate performance.
+Wherever different strategies are likely to favor
+the performance of
+individual connections to the detriment of aggregate performance
+or vice versa, the
+various strategies are under the control of options via the
+\fIgetsockopt()\fR and
+\fIsetsockopt()\fR system calls (see the manual pages
+\fIgetsockopt(2)\fR,
+\fIsetsockopt(2)\fR
+and
+\fItp(4p)\fR
+for details).
+In some cases the preferred strategies differ for the different
+subnetworks, so the strategies chosen will be determined
+by the subnetwork in use.
+.sh 2 "TPDU size"
+.pp
+The limitation of the maximum TPDU size to a power of two is
+unfortunate in the LAN environment.
+For example, if the maximum NSDU size is around 1500, as in the case of an
+Ethernet,
+using a maximum TPDU size of 1024 reduces
+the possible throughput by approximately 30%.
+TP negotiates a maximum TPDU size of 2048 and
+generates TPDUs of size around 1500.
+Obviously this works well only when the peer is known to be
+using the same scheme (so that the peer
+doesn't send TPDUs of size 2048 and cause its
+network layer to fragment the TPDUs).
+This is likely to be the case in a LAN where
+all protocol entities are under the same administrative
+control.
+The maximum TPDU size negotiated is under the control of the user,
+so
+it is possible to prevent this scheme from being used
+by default
+when the peer is not on the same LAN, by
+setting the \fItp.tpdusize\fR parameter in the ARGO directory service
+file to
+something less than the network's maximum transmission
+unit.
+.\"***********************************************************
+.sh 2 "Congestion Window Strategy"
+.pp
+The congestion window strategy from the
+DoD Internet
+was adapted for use with TP.
+The strategy is intended to minimize the
+adverse effect
+of transport's retransmission on an
+already congested network.
+.pp
+A TP entity keeps two notions of the peer's window:
+the real window, which is that advertised by the peer
+in AK TPDUs, and the congestion window, which is a locally
+controlled window.
+TP uses the smaller of the two windows when transmitting.
+The congestion window starts small, which keeps a
+new connection from overloading the network with a sudden
+burst of packets
+immediately after connection establishement.
+This is called \fIslow start\fR.
+For each successful acknowledgment received, the congestion
+window grows by one, until eventually the real window
+is the one in use.
+If a retransmission timer expires, the congestion window
+is reset to size one.
+.pp
+The congestion window strategy is used for class 4 unless
+the transport user requests that it not be used.
+The slow start strategy is used for traffic over a PDN
+unless
+the transport user requests that it not be used.
+Slow start is not used for traffic over a LAN unless
+its use is requested by the transport user.
+.\"***********************************************************
+.sh 2 "Retransmission strategies"
+.pp
+A retransmission timer is invoked for each set of DT TPDUs
+sent in one send operation (call to \fItp_send()\fR).
+This set of packets is called the \fIsend window\fR for the purpose
+of this discusssion.
+.pp
+The number of TPDUs
+in a send window
+depends on the remote credit and the amount of data
+in the local send buffers.
+When a retransmission timer goes off, the lower
+window edge
+is reevaluated but the upper window edge is not reevaluated.
+.pp
+There are several retransmission strategies implemented in
+ARGO TP.
+The choice of strategies is the user's, and is made with the
+\fIsetsockopt()\fR system call.
+The strategies are summarized here:
+.ip "Retransmit LWE TPDU only:" 5
+Only the TPDU representing the new lower window edge
+is retransmitted.
+This is the default retransmission strategy.
+.ip "Retransmit whole send window:" 5
+Retransmission begins with the new lower window edge
+and continues up to the old upper window edge.
+.pp
+The value of the data retransmission timer
+adapts to the average round trip time and the standard deviation of
+the round trip time.
+A round trip time is the time that passes between
+the moment of a packet's first transmission and
+the moment it is first acknowledged.
+The average round trip time
+is kept by the sending side of TP, using
+a formula for
+smoothing the average:
+.(b
+\fC
+.TS
+tab(+);
+l l l l.
+#define+TP_RTT_ALPHA+3
+#define+TP_RTV_ALPHA+2
++++
+#define+SMOOTH(alpha, old, new) \\
++(((new-old) >> alpha ) \+ (old) )
+.TE
+\fR
+.)b
+.lp
+The times included in the average are chosen as follows.
+The time of
+each packet's initial transmission is kept (for the last
+\fIN\fR packets, where \fIN\fR is a defined constant).
+When an AK TPDU arrives, ARGO TP subtracts the initial transmission
+time for the lowest unacknowledged sequence number that was
+acknowledged by this AK TPDU from the current time,
+and apply the resulting time to the average.
+Hence, not all packets are included in this average,
+which is as it should be since
+the purpose of this measurement is
+to find a good value for the retransmission timer.
+.pp
+Each time part of a window is retransmitted,
+the retransmission timer for that window is increased.
+This does not affect the retransmission timers for other windows.
+.\"***********************************************************
+.sh 2 "Acknowledgment strategies"
+.pp
+The transport protocol specification
+requires acknowledgments to be sent immediately
+upon receipt
+of CC TPDUs (in class 4), XPD TPDUs, and DT TPDUs containing an
+EOT marker, and at other times as required for flow control,
+otherwise acknowledgments may be delayed.
+In addition to the times when an acknowledgment is required,
+ARGO TP transmits an AK TPDU whenever the user receives some data,
+thereby increasing the size of the window.
+For those times when
+immediate acknowledgment is optional,
+ARGO TP offers two acknowledgment strategies:
+.ip " Acknowledge each TPDU" 10
+Upon receipt of a DT TPDU and AK TPDU is sent.
+.ip " Acknowledge full window" 10
+Acknowledgment is issued
+upon receipt of enough data to
+consume the last advertised credit.
+.pp
+The latter strategy
+requires a timer to trigger an acknowledgment
+in case the peer doesn't send the entire window
+quickly.
+This timer is called the
+\fIsendack timer\fR.
+The upper bound on the value of this timer
+is called the \fIlocal acknowledgment time\fR.
+The local acknowledgment time may be "advertised" to the
+peer during connection establishment, and the
+peer may choose to use this value to
+adjust its retransmission timers.
+The ARGO TP entity advertises its local acknowledgment time
+on a CR TPDU, but it is not
+constrained by
+the remote acknowledge time, should the peer
+advertise it.
+Instead,
+ARGO TP adapts its sendack timer
+to the behavior of the connection.
+.pp
+Under the assumption that the round trip time is
+often
+symmetric,
+and lacking
+a method to measure
+the round trip time in the other direction,
+ARGO TP uses the measured average round trip time
+to adjust the the sendack timer.
+.pp
+The choice of strategies is made with the
+\fIsetsockopt()\fR system call.
+The default strategy is
+to
+delay acknowledgments until the most recently advertised window is filled.
diff --git a/share/doc/iso/wisc/trans_serv.nr b/share/doc/iso/wisc/trans_serv.nr
new file mode 100644
index 0000000..462186e
--- /dev/null
+++ b/share/doc/iso/wisc/trans_serv.nr
@@ -0,0 +1,692 @@
+.NC "Transport Service Interface"
+.sh 1 "General"
+.pp
+It is assumed that the reader is acquainted with the
+set of system calls and library routines that
+compose the
+Berkeley
+Unix interprocess communication service (IPC).
+To every extent possible
+the ARGO transport service is provided by the same IPC mechanisms
+that support
+the transport-level services
+included in the
+AOS distribution.
+In some instances, the interface
+provided by AOS does not support
+the services required by ISO 8073,
+so system calls were added to support these services.
+It is felt that this is superior to modifying
+existing system calls, in order to avoid the
+recoding of existing Unix utilities.
+.pp
+What follows is a description of the system calls
+that are used to provide the transport service.
+According to Unix custom,
+the return value of a system call is 0
+if the call succeeds and -1 if
+the call fails for any reason.
+In the latter case,
+the global variable
+\fIerrno\fR contains more information
+about the error that caused the failure.
+In the descriptions of all the system calls for which
+this custom is followed,
+the return value is named
+\fIstatus\fR.
+.sh 1 "Connection establishment"
+.pp
+Establishing a TP connection is similar to
+establishing a connection using any other
+transport protocol supported by Unix.
+The same system calls are used, and the passive open
+is required.
+Some of the parameters to the system calls differ.
+.pp
+The following call creates a communication endpoint called a
+\fIsocket\fR.
+It returns
+a positive integer called a
+\fIsocket descriptor\fR,
+which
+will be a parameter in all communication primitives.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+s = socket( af, type, protocol )
+.T&
+l l l.
+ +int+s,af,type,protocol;
+.TE
+\fR
+.)b
+.pp
+The \fIaf\fR parameter describes the format of addresses
+used in this communication.
+Existing formats include AF_INET (DoD Internet addresses),
+AF_PUP (Xerox PUP-I Internet addresses), and AF_UNIX
+(addresses are Unix file names, for intra-machine IPC only).
+TP runs in either the Internet domain or the ISO domain (AF_ISO).
+When using the Internet domain, the network layer is the DoD Internet IP
+with Internet-style addresses. The ISO domain uses the ISO
+network service and ISO-style addresses\**.
+.(f
+\**ISO/DP 8348/DAD2 Addendum to the Network
+Service Definition Covering Network Layer Addressing.
+.)f
+Regardless of the address family used, an address takes the
+general form,
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct sockaddr {
+.T&
+l l l l.
+ +short+sa_family;+/* address family */
+ +char+sa_data[14];+/* space for an address */
+}+
+.TE
+\fR
+.)b
+.sp 1
+When viewed as an Internet address, it takes the form
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct sockaddr_in {
+.T&
+l l l l.
+ +short+sin_family;+/* address family */
+ +u_short+sin_port;+/* internet port */
+ +struct in_addr+sin_addr;+/* network addr A.B.C.D */
+ +char+sin_zero[8];+/* unused */
+}
+.TE
+\fR
+.)b
+.sp 1
+When viewed as an ISO address, it takes the form
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+struct sockaddr_iso {
+.T&
+l l l l.
+ +short+siso_family;+/* address family */
+ +u_short+siso_tsuffix;+/* transport suffix */
+ +struct iso_addr+siso_addr;+/* ISO NSAP addr */
+ +char+siso_zero[2];+/* unused */
+}
+.TE
+\fR
+.)b
+The address described by a \fIsockaddr_iso\fR structure
+is a TSAP-address (transport service access point address).
+It is made of an NSAP-address (network service access point address)
+and a TSAP selector (also called a transport suffix or
+transport selector, hereafter called a TSEL).
+The structure \fIsockaddr_iso\fR contains a 2-byte TSEL.
+This is for compatibility with Internet addressing.
+ARGO supports
+TSELs of length 1-64 bytes.
+TSELs of any length other than 2
+are called \*(lqextended TSELs\*(rq.
+They are described in detail in the section \fB\*(lqExtended TSELs\*(rq\fR.
+If extended TSELs are not requested, 2-byte TSELs are used by default.
+.pp
+Refer to Chapter Five for more information about ISO NSAP-addresses.
+.pp
+The \fItype\fR parameter in the \fIsocket()\fR call
+distinguishes
+datagram protocols, stream protocols, sequenced
+packet protocols, reliable datagram protocols, and
+"raw" protocols (in other words, the absence of a transport protocol).
+Unix provides manifest named constants for each of these types.
+TP supports the sequenced packet protocol abstraction, to which
+the manifest constant SOCK_SEQPACKET applies.
+.pp
+The \fIprotocol\fR
+parameter is an integer that identifies the protocol to be used.
+Unix provides a database of protocol names and their associated
+protocol numbers.
+Unix also provides user-level tools
+for searching the database.
+The tools take the form of library routines.
+A protocol number for TP has been chosen
+by the Internet NIC to allow TP to run in the Internet domain, and this
+has been added to the Unix network protocol database.
+The standard Internet database tools that serve TCP users
+can
+also serve user of TP
+in the Internet domain, if the TP protocol number is added to the
+proper Internet database file,
+\fC/etc/protocols\fR.
+This change must be made for TP to run in either the Internet or
+in the ISO domain.
+The ARGO package contains a set of tools and a database
+for use with TP in the ISO domain.
+This set of tools is described in the manual pages
+\fIisodir(5)\fR and
+\fIisodir(3)\fR.
+.pp
+When a socket is created, it is not given an address.
+Since a socket cannot be reached by a remote entity unless it has an address,
+the user must request that a socket be given an address by
+using the \fIbind()\fR system call:
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = bind( s, addr, addrlen )
+.T&
+l l l.
+ +int+s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.pp
+The address is expected to be in the format specified by the
+\fIaf\fR parameter to the \fIsocket()\fR
+call that yielded the socket descriptor \fIs\fR.
+If the user
+passes an address parameter with a zero-valued transport suffix,
+the transport layer
+assigns an unused 2-byte transport selector.
+This is a 4.3 Unix convention; it is not part of any ISO standard.
+.pp
+The \fIconnect()\fR system call effects an active open.
+It is used to establish a connection with an entity that is
+passively waiting for connection requests, and whose
+transport address is known.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = connect( s, addr, addrlen )
+.T&
+l l l.
+ +int+s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.pp
+The first parameter is a socket descriptor.
+The \fIaddr\fR parameter is a transport address in the format
+specified by the \fIaf\fR parameter to the \fIsocket()\fR
+call that yielded the socket descriptor \fIs\fR.
+.pp
+A passive open is accomplished with two system calls,
+\fIlisten()\fR followed by \fIaccept()\fR.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = listen( s, queuelen )
+.T&
+l l l.
+ +int+s;
+ +int+queuelen;
+.TE
+\fR
+.)b
+.pp
+The \fIqueuelen\fR argument specifies the maximum
+number of pending connection
+requests that will be queued for acceptance by this user.
+Connections are then accepted by the
+system call \fIaccept()\fR.
+There is no way to refuse connections.
+The functional equivalent of connection
+refusal is accomplished by accepting a connection and immediately
+disconnecting.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+new_s = accept( s, addr, addrlen )
+.T&
+l l l.
+ +int+new_s, s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.pp
+The \fIaccept()\fR call completes the connection
+establishment. If a connection request from a prospective peer
+is pending on the socket described by \fIs\fR, it is removed and
+a new socket is created for use with this connection.
+A socket descriptor for the new socket is returned by the
+system call.
+If no connection requests are pending, this call blocks.
+If the \fIaccept()\fR call fails, -1 is returned.
+The transport address of the entity requesting the connection
+is returned in the \fIaddr\fR parameter, and the length
+of the address is returned in the \fIaddrlen\fR parameter.
+The address associated with the new socket is inherited
+from the socket on which the \fIlisten()\fR and \fIaccept()\fR were performed.
+.pp
+It is possible for the \fIaccept()\fR call to be interrupted
+by an asynchronous event such as the arrival of expedited
+data.
+When system calls are interrupted, Unix returns the value -1
+to the caller and puts the constant
+EINTR in the global variable \fIerrno\fR.
+This can create problems with the system call \fIaccept()\fR.
+In the case of incoming expedited data, the interruption does
+not indicate a problem, but the data may have arrived before
+the caller has received the new socket descriptor, which is the
+socket descriptor on which the expedited data are to be received.
+In order to prevent this problem from occurring, the caller must
+prevent the issuance of asynchronous indications until the
+\fIaccept()\fR
+call has returned.
+Asynchronous indications are discussed below, in
+the section titled
+"Indications from the transport layer to the transport user".
+.pp
+It is possible to discover the
+address bound to a
+socket with the
+\fIgetsockname()\fR system call.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = getsockname( s, addr, addrlen )
+.T&
+l l l.
+ +int+s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.pp
+If the socket has a peer, that is, it is connected,
+the system call
+\fIgetpeername()\fR
+is used to discover the peer's address.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = getpeername( s, addr, addrlen )
+.T&
+l l l.
+ +int+s;
+ +struct sockaddr+*addr;
+ +int+addrlen;
+.TE
+\fR
+.)b
+.lp
+The names returned by
+\fIgetsockname()\fR and \fIgetpeername()\fR
+do not contain extended TSELs.
+Extended TSELs can be retrieved with
+the \fIgetsockopt()\fR and
+\fIsetsockopt()\fR system calls, described below.
+.pp
+Unix supports several protocol-independent options
+and protocol-specific options
+associated with sockets.
+These options can be inspected and changed by using
+the \fIgetsockopt()\fR and
+\fIsetsockopt()\fR system calls.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = getsockopt( s, level, option, value, valuelen )
+.T&
+l l l.
+ +int+s, level, option;
+ +char+*value;
+ +int+*valuelen;
+.TE
+\fR
+.)b
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = setsockopt( s, level, option, value, valuelen )
+.T&
+l l l.
+ +int+s, level, option;
+ +char+*value;
+ +int+valuelen;
+.TE
+\fR
+.)b
+.pp
+The \fIlevel\fR argument may indicate
+either
+that this option applies to sockets or that it applies to
+a specific protocol.
+The constants SOL_SOCKET, SOL_TRANSPORT, and SOL_NETWORK
+are possible values for the \fIlevel\fR argument.
+The \fIoption\fR argument is an integer that identifies
+the option chosen.
+.\" LIST THE OPTIONS HERE
+The options available to TP users provide the
+user with the ability to control various TP protocol options
+including but not limited to
+TP class, TPDU size negotiated, TPDU format used,
+acknowledgment and retransmission strategies.
+For a detail list of the options, see the manual page \fItp(4p)\fR.
+.sh 1 "Extended TSELs"
+.pp
+ARGO supports TSELs
+of length 1 byte - 64 bytes for sockets bound to addresses in the
+AF_ISO address family.
+The ARGO user program uses the
+\fIgetsockopt()\fR
+and
+\fIsetsockopt()\fR
+system calls to
+discover and assign extended TSELs.
+.pp
+To create a socket with an extended TSEL,
+the process
+.ip \(bu 5
+opens a socket with \fCsocket(AF_ISO, SOCK_SEQPACKET, ISOPROTO_TP)\fR
+.ip \(bu 5
+binds an NSAP-address to the socket with \fIbind()\fR.
+The address bound may contain a 2-byte selector (\fIiso_tsuffix\fR).
+.ip \(bu 5
+uses \fIsetsockopt()\fR with the command TPOPT_MY_TSEL,
+to assign a TSEL to the socket.
+.ip \(bu 5
+calls \fIlisten(), connect()\fR, or any other appopriate system calls
+to use the socket as desired.
+.lp
+To connect to a transport entity that is bound to a TSAP-address with
+an extended TSEL, the
+process
+.ip \(bu 5
+opens a socket with \fCsocket(AF_ISO, SOCK_SEQPACKET, ISOPROTO_TP)\fR
+.ip \(bu 5
+uses \fIsetsockopt()\fR, with the command TPOPT_PEER_TSEL,
+to assign a PEER TSEL to the socket.
+This TSEL is used by the transport entity
+for all subsequent connect requests made on this socket,
+unless the peer TSEL is changed by another call to
+\fIsetsockopt()\fR employing the command TPOPT_PEER_TSEL.
+.lp
+To discover the TSEL of the peer of a connected socket,
+the process
+.ip \(bu 5
+uses \fIgetsockopt()\fR with the command TPOPT_PEER_TSEL.
+.lp
+To discover the TSEL of socket's own address,
+the process
+.ip \(bu 5
+uses \fIgetsockopt()\fR with the command TPOPT_MY_TSEL.
+.sh 1 "Data transfer"
+.pp
+The system calls provided by AOS for data transfer have
+semantics that are unsuitable for TP,
+and in fact they are seriously deficient for the correct
+operation of any user program that uses out-of-band or expedited
+data in any way except to cause the program to abort.
+The problem lies in the manner in which the kernel
+handles interrupted system calls.
+The send and receive primitives
+may be interrupted by signals.
+A signal is the mechanism used to indicate
+the presence of expedited data or out-of-band data.
+If the send or receive primitive is interrupted before completion,
+the user needs to know how many octets of data were sent or received.
+The existing system call interface does not provide this
+information, nor does it permit TP to provide
+this information.
+All forms of the existing interface
+(\fIsend()\fR,
+\fIrecv()\fR,
+\fIsendmsg()\fR,
+\fIrecvmsg()\fR,
+\fIsendto()\fR,
+\fIrecvfrom()\fR,
+\fIwrite()\fR,
+\fIread\fR,
+\fIwritev()\fR,
+and \fIreadv()\fR system calls)
+return an octet count
+when the system call completes, and return an error
+indication (-1, \fIerrno\fR == EINTR) if the system call is interrupted.
+To change the semantics
+of these calls would create havoc with existing user-level software.
+Instead two new system calls are provided to support data transfer.
+(The existing interface may be used if the user does not need the additional
+service provided by the new system calls.)
+.pp
+The two new system calls are patterned after
+\fIreadv()\fR and \fIwritev()\fR,
+the scatter-gather or "vectored"
+versions of \fIread()\fR and \fIwrite()\fR.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+cc = sendv( s, iov, iovlen, flags )
+.T&
+l l l.
+ +int+s:
+ +io_vector+iov;
+ +int+iovlen;
+ +unsigned int+*flags;
+.TE
+\fR
+.)b
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+cc = recvv( s, iov, iovlen, flags )
+.T&
+l l l.
+ +int+s:
+ +io_vector+iov;
+ +int+iovlen;
+ +unsigned int+*flags;
+.TE
+\fR
+.)b
+.pp
+The \fIiov\fR argument is an \fIio_vector\fR,
+an array of pointers and lengths that
+describe the areas from (or to) which the data will be gathered (or scattered).
+The \fIiovlen\fR argument is an integer that tells how many parts are in
+the io_vector.
+The \fIflags\fR parameter serves several purposes.
+The TP specification requires that TSDUs be unlimited in size.
+System calls cannot pass unlimited amounts of data between the user
+and the kernel, so
+there cannot be a one-to-one correspondence between TSDUs and
+system calls.
+The \fIflags\fR
+parameter is used to mark the end-of-TSDU on both sending and
+receiving.
+This way one TSDU can span several system calls.
+When sending,
+the user sets
+this flag
+to indicate that this request completes a TSDU.
+When receiving,
+TP sets this flag when
+the end of a TSDU is reached.
+In the latter case, the end of the data received by the transport user
+with a given system call
+coincides with the end of the TSDU
+if
+the TP has set the end-of-TSDU bit
+in the \fIflags\fR parameter of the \fIrecv()\fR system call.
+It is possible for the peer to send an empty TPDU with the end-of-TSDU
+flag set, in which case the transport user
+may receive zero octets with the end-of-TSDU flag set.
+See the manual pages
+\fIrecvv(2)\fR
+and
+\fIsendv(2)\fR
+for details.
+.pp
+The \fIflags\fR parameter also serves to distinguish data transfer primitives
+from expedited data transfer primitives.
+The flag bit MSG_OOB is provided for "out of band data" in the
+DoD Internet protocols. It is also used to provide the expedited data service
+of the ISO protocols.
+The transport layer will deliver one expedited datum (there will be a
+one-to-one correspondence between expedited TSDUs and XPD TPDUs)
+at a time.
+The user must receive the datum before the transport
+layer will accept more expedited data.
+Each expedited datum my contain up to 16 octets.
+.pp
+.sh 1 "Disconnection"
+.pp
+The \fIclose\fR system call will disconnect any association
+between two TP entities.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+status = close( s )
+.T&
+l l l.
+ +int+s;
+.TE
+\fR
+.)b
+.pp
+The argument \fIs\fR is a socket descriptor.
+If a Unix user process terminates, Unix will close all files and
+sockets associated with the process, which means all transport
+connections associated with the process will be disconnected.
+.sh 1 "Indications from the transport layer to the transport user"
+.pp
+While the above set of system calls allows you to establish
+a connection, transfer data, and disconnect,
+several elements of the transport service are not supported
+by these system calls alone.
+These system calls do not support
+any way to indicate to the
+to the transport user
+the
+presence of expedited data or
+a disconnection initiated by the peer or by one of the
+cooperating TP entities.
+.pp
+The Unix signal mechanism is used to provide these
+service elements.
+When an expedited data TSDU arrives, the TP interrupts
+the user with a SIGURG signal ("urgent condition present on socket").
+The user must have previously registered a procedure to handle
+the signal by using the \fIsigvec()\fR system call or the
+\fIsignal()\fR library routine provided for that purpose.
+The signal handler takes the form
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+int sighandler( signal_number)
+.T&
+l l l.
+ +int+signal_number;
+.TE
+\fR
+.)b
+.pp
+The \fIsignal_number\fR argument will be the well-known constant SIGURG.
+There are two reasons for
+the transport layer to issue
+a SIGURG:
+expedited data
+are present or
+disconnection was initiated by a transport entity or by the peer.
+Should the user have more than one transport connection open,
+another system call is used to determine to which socket(s)
+the urgent condition applies.
+This is the \fIselect()\fR system call, described below.
+.pp
+When the SIGURG indicates a disconnection, there may be
+user data from the peer present.
+TP
+discards all queued normal data and expedited data.
+It saves the disconnect data for the user to receive via the
+\fIgetsockopt()\fR system call.
+Unfortunately, the socket is already considered closed
+by the kernel, so there is no way
+for the user to read the incoming disconnect data, so receipt
+of disconnect data is not supported.
+.\"
+.\"If the user does not receive the disconnect data before the
+.\"reference timer expires, the data will be discarded and the
+.\"socket will be closed.
+.pp
+Transport service users may use more than one transport
+connection at a time.
+The \fIselect()\fR system call facilitates this.
+.(b
+\fC
+.TS
+tab(+);
+l s s s.
+#include <sys/types.h>
++
+nfound = select( num_to_scan, recvmask, sendmask,
++exceptmask, timeout )
+.T&
+l l l.
+ +int+nfound, num_to_scan;
+ +fd_set+*recvmask, *sendmask, *exceptmask;
+ +time+timeout;
+.TE
+\fR
+.)b
+.pp
+This system call takes as parameters a set of masks
+that specify a subset of the socket descriptors that are in
+use by the user program.
+\fISelect()\fR inspects the sockets to see if they have data
+to be received, can service a send without blocking, or
+have an exceptional condition pending, respectively.
+The masks will be set upon return to indicate the socket descriptors
+for which the respective conditions exist.
+The \fInum_to_scan\fR argument limits the number of sockets that are
+inspected.
+The call will return within the amount of time given in the
+\fItimeout\fR parameter, or, if the parameter is zero, \fIselect()\fR
+will block indefinitely.
+.\" FIGURE
+.so figs/TS_primitives.nr
+.pp
+.CF
+summarizes the mapping of the transport service primitives
+to Unix facilities.
diff --git a/share/doc/iso/wiscman/arp.4p b/share/doc/iso/wiscman/arp.4p
new file mode 100644
index 0000000..f0fe2a0
--- /dev/null
+++ b/share/doc/iso/wiscman/arp.4p
@@ -0,0 +1,118 @@
+.\" Copyright (c) 1983 Regents of the University of California.
+.\" All rights reserved. The Berkeley software License Agreement
+.\" specifies the terms and conditions for redistribution.
+.\"
+.\" @(#)arp.4p 6.2 (Berkeley) 5/15/86
+.\"
+.TH ARP 4P "May 15, 1986"
+.UC 5
+.SH NAME
+arp \- Address Resolution Protocol
+.SH SYNOPSIS
+.B "pseudo-device ether"
+.SH DESCRIPTION
+ARP is a protocol used to dynamically map between DARPA Internet
+and 10Mb/s Ethernet addresses. It is
+used by all the 10Mb/s Ethernet interface drivers.
+It is not specific to Internet protocols or to 10Mb/s Ethernet,
+but this implementation currently supports only that combination.
+.PP
+ARP caches Internet-Ethernet address mappings. When an interface
+requests a mapping for an address not in the cache, ARP queues the
+message which requires the mapping and broadcasts
+a message on the associated network requesting the address mapping.
+If a response is provided, the new mapping is cached and any pending
+message is transmitted.
+ARP will queue
+at most one packet while waiting for a mapping request to be responded to;
+only the most recently ``transmitted'' packet is kept.
+.PP
+To facilitate communications with systems which do not use ARP,
+.IR ioctl \^s
+are provided to enter and delete entries in the Internet-to-Ethernet tables.
+Usage:
+.LP
+.nf
+.ft B
+ #include <sys/ioctl.h>
+ #include <sys/socket.h>
+ #include <net/if.h>
+ struct arpreq arpreq;
+
+ ioctl(s, SIOCSARP, (caddr_t)&arpreq);
+ ioctl(s, SIOCGARP, (caddr_t)&arpreq);
+ ioctl(s, SIOCDARP, (caddr_t)&arpreq);
+.fi
+.ft R
+Each ioctl takes the same structure as an argument.
+SIOCSARP sets an ARP entry, SIOCGARP gets an ARP entry, and SIOCDARP
+deletes an ARP entry. These ioctls may be applied to any socket descriptor
+.I s,
+but only by the super-user.
+The
+.I arpreq
+structure contains:
+.LP
+.RS
+.ta \w'#define\ \ 'u +\w'ATF_USETRAILERS\ \ 'u +\w'0x08\ \ \ \ 'u
+.nf
+/*
+ * ARP ioctl request
+ */
+struct arpreq {
+ struct sockaddr arp_pa; /* protocol address */
+ struct sockaddr arp_ha; /* hardware address */
+ int arp_flags; /* flags */
+};
+/* arp_flags field values */
+#define ATF_COM 0x02 /* completed entry (arp_ha valid) */
+#define ATF_PERM 0x04 /* permanent entry */
+#define ATF_PUBL 0x08 /* publish (respond for other host) */
+#define ATF_USETRAILERS 0x10 /* send trailer packets to host */
+.fi
+.RE
+.LP
+The address family for the
+.I arp_pa
+sockaddr must be AF_INET; for the
+.I arp_ha
+sockaddr it must be AF_UNSPEC.
+The only flag bits which may be written are ATF_PERM, ATF_PUBL
+and ATF_USETRAILERS.
+ATF_PERM causes the entry to be permanent if the ioctl call succeeds.
+The peculiar nature of the ARP tables may cause the ioctl to fail if more
+than 8 (permanent) Internet host addresses hash to the same slot.
+ATF_PUBL specifies that the ARP code should respond to ARP requests for the
+indicated host coming from other machines. This allows a host to act as an
+``ARP server,'' which may be useful in convincing an ARP-only machine to talk
+to a non-ARP machine.
+.PP
+ARP is also used to negotiate the use of trailer IP encapsulations;
+trailers are an alternate encapsulation used to allow efficient packet
+alignment for large packets despite variable-sized headers.
+Hosts which wish to receive trailer encapsulations so indicate
+by sending gratuitous ARP translation replies along with replies
+to IP requests; they are also sent in reply to IP translation replies.
+The negotiation is thus fully symmetrical, in that either or both hosts
+may request trailers.
+The ATF_USETRAILERS flag is used to record the receipt of such a reply,
+and enables the transmission of trailer packets to that host.
+.PP
+ARP watches passively for hosts impersonating the local host (i.e. a host
+which responds to an ARP mapping request for the local host's address).
+.SH DIAGNOSTICS
+.B "duplicate IP address!! sent from ethernet address: %x:%x:%x:%x:%x:%x."
+ARP has discovered another host on the local network which responds to
+mapping requests for its own Internet address.
+.SH SEE ALSO
+ec(4), de(4), il(4), inet(4F), arp(8C), ifconfig(8C)
+.br
+``An Ethernet Address Resolution Protocol,'' RFC826, Dave Plummer,
+Network Information Center, SRI.
+.br
+``Trailer Encapsulations,'' RFC893, S.J. Leffler and M.J. Karels,
+Network Information Center, SRI.
+.SH BUGS
+ARP packets on the Ethernet use only 42 bytes of data; however, the smallest
+legal Ethernet packet is 60 bytes (not including CRC).
+Some systems may not enforce the minimum packet size, others will.
diff --git a/share/doc/iso/wiscman/clnp.4p b/share/doc/iso/wiscman/clnp.4p
new file mode 100644
index 0000000..07efd1a
--- /dev/null
+++ b/share/doc/iso/wiscman/clnp.4p
@@ -0,0 +1,91 @@
+.TH CLNP 4P "9 December 1988"
+.ds ]W Wisconsin ARGO 1.0
+.UC 4
+.SH NAME
+clnp \- Connectionless-Mode Network Protocol
+.SH SYNOPSIS
+.B #include <sys/socket.h>
+.br
+.B #include <netargo/iso.h>
+.br
+.B #include <netargo/clnp.h>
+.PP
+.B s = socket(AF_ISO, SOCK_RAW, 0);
+.SH DESCRIPTION
+CLNP is the connectionless-mode network protocol used by the
+connectionless-mode network service. This protocol is specified in
+ISO 8473.
+It may be accessed
+through a \*(lqraw socket\*(rq for debugging purposes only.
+CLNP sockets are connectionless,
+and are normally used with the
+.I sendto
+and
+.I recvfrom
+calls, though the
+.IR connect (2)
+call may also be used to fix the destination for future
+packets (in which case the
+.IR read (2)
+or
+.IR recv (2)
+and
+.IR write (2)
+or
+.IR send (2)
+system calls may be used).
+.PP
+Outgoing packets automatically have a CLNP header prepended to
+them. Incoming packets received by the user contain the full CLNP header.
+The following \fIsetsockopt\fR options apply to CLNP:
+.TP
+CLNPOPT_FLAGS
+Sets the flags which are passed to clnp when sending a datagram.
+Valid flags are:
+.nf
+.br
+CLNP_NO_SEG-Do not allow segmentation
+CLNP_NO_ER-Suppress ER pdus
+CLNP_NO_CKSUM-Do not generate the CLNP checksum
+.br
+.fi
+.TP
+CLNPOPT_OPTS
+Sets CLNP options. The options must be formatted exactly as specified by
+ISO 8473, section 7.5 "Options Part." Once an option has been set, it will
+be sent on all packets until a different option is set.
+.SH DIAGNOSTICS
+A socket operation may fail with one of the following errors returned:
+.TP 15
+[EISCONN]
+when trying to establish a connection on a socket which
+already has one, or when trying to send a datagram with the destination
+address specified and the socket is already connected;
+.TP 15
+[ENOTCONN]
+when trying to send a datagram, but
+no destination address is specified, and the socket hasn't been
+connected;
+.TP 15
+[ENOBUFS]
+when the system runs out of memory for
+an internal data structure;
+.TP 15
+[EADDRNOTAVAIL]
+when an attempt is made to create a
+socket with a network address for which no network interface
+exists;
+.TP 15
+[EHOSTUNREACH]
+when trying to send a datagram, but no route to the destination
+address exists.
+.TP 15
+[EINVAL]
+when specifying unsupported options.
+.SH SEE ALSO
+send(2), recv(2), intro(4N), iso(4F)
+.SH BUGS
+Packets are sent with the type code of 0x1d (technically an invalid
+packet type) for lack of a better way to identify raw CLNP packets.
+.PP
+No more than MLEN bytes of options can be specified.
diff --git a/share/doc/iso/wiscman/cons.4 b/share/doc/iso/wiscman/cons.4
new file mode 100644
index 0000000..73bd3fc
--- /dev/null
+++ b/share/doc/iso/wiscman/cons.4
@@ -0,0 +1,241 @@
+.\"
+.\" 5799-WZQ (C) COPYRIGHT IBM CORPORATION 1986,1987,1988
+.\" LICENSED MATERIALS - PROPERTY OF IBM
+.\" REFER TO COPYRIGHT INSTRUCTIONS FORM NUMBER G120-2083
+.\"
+.\"$Header:cons.4_ca 11.3$
+.\"$ACIS:cons.4_ca 11.3$
+.\"$Source: /ibm/acis/usr/man/man4/RCS/cons.4_ca,v $
+.\" This file uses -man macros.
+.TH CONS 4 "Sept 1988" "Space overwritten by .AC macro" " "
+.AC 1 0
+.SH NAME
+cons \- keyboard and console display interface
+.SH DESCRIPTION
+The keyboard and various possible displays combine to
+provide a terminal-like
+interface to the system. Internally, these are separate devices which
+software combines to emulate a normal terminal. See the appropriate manual
+pages for information about each display and the keyboard.
+.PP
+The keyboard adapter also supports the speaker, which is activated
+when the ASCII character \fBbel\fP (\fB^G\fP)
+is sent to the display with software.
+For additional information on speaker control, see \fIspeaker\fP(4).
+.PP
+.B Console Device Control
+.PP
+The display devices,
+\fI/dev/ttyaed\fR, \fI/dev/ttyap16\fR, \fI/dev/ttyap8c\fR,
+\fI/dev/ttyapa8\fR, \fI/dev/ttyega\fR, \fI/dev/ttymono\fR,
+\fI/dev/ttympel\fR, \fI/dev/ttyvga\fR, and \fI/dev/tty8514\fR are all
+minor devices under
+\fI/dev/console\fR, and are all capable of displaying console output.
+Unique to this system is the fact that you may have one or more of these
+displays on your workstation at a time and any one can act as a console.
+With only one keyboard and system mouse, the console driver
+multiplexes these input devices to the many displays.
+All of the displays may have simultaneous logins and the user
+can ``hot key'' between each display.
+At first, this
+``input focus''
+is on
+the first device in the above sequence to
+be found at initialization time. The input focus
+can be manually switched to the next available display by pressing the
+default ``hot key'' <Alt><Scroll Lock>.
+When the
+input focus
+is on a display, all keyboard and mouse data are sent to the process(es)
+that read from that display.
+.PP
+If no other console tty device is open, and only the default input
+emulator is used (see \fIkbdemul\fP(4)), the input focus is set to
+\fI/dev/console\fP. In this case, <Alt><Scroll Lock> only switches
+which display gets console output.
+In the case where one or more tty devices are open, or the default input
+emulator changes, \fI/dev/console\fP gets no input. It tries to send output
+to the currently focused device. A user can redirect these console messages
+to any tty devices with the TIOCCONS ioctl.
+.PP
+To support the many displays and the multiplexing between them, an
+emulator package was developed to work with the console driver.
+This package allows different types of emulation on input and output to
+be written independently of device.
+.PP
+The display devices \fI/dev/aed\fP, \fI/dev/apa16\fP, \fI/dev/apa8c\fP,
+\fI/dev/apa8\fP, \fI/dev/ega\fR, \fI/dev/mono\fP,
+\fI/dev/mpel\fR, \fI/dev/vga\fR, and \fI/dev/ibm8514\fR
+are also minor devices to
+\fI/dev/console\fP. They are typically used by window managers and other
+graphic applications. When the focus is pointed to one of these display
+devices, the console messages are put in a circular buffer
+(see \fIbufemul\fP(4))
+unless redirected with the TIOCCONS ioctl.
+The buffer is flushed to the screen upon closing the display device.
+.PP
+The following are generic console \fIioctls\fP defined in \fIscreen_cousf.u\fP:
+.TP 20
+CON_SELECT_SCREEN
+Output focus is set to display number (arg > 0) or
+to next display in list (arg < 0). Previous display number is returned.
+.TP 20
+CON_GET_SCREEN
+Just returns the current output focus display number.
+.TP 20
+EIGETD
+Gets the number of the current input emulator for this display.
+.TP 20
+EOGETD
+Gets the number of the current output emulator for this display.
+.TP 20
+EISETD
+Sets the input emulator and returns the previous for this display.
+.TP 20
+EOSETD
+Sets the output emulator and returns the previous for this display.
+.TP 20
+CON_INIT_SCREEN
+Initializes the specified display (arg >=0) or this display
+(arg < 0).
+.TP 20
+CON_GET_FOCUS_CODE
+Gets the current keyboard code for setting the console
+focus (affects xemul only).
+.TP 20
+CON_SET_FOCUS_CODE
+Sets the current keyboard code for setting the console
+focus (affects xemul only), and return the previous code.
+.PP
+All of the above commands take integer arguments.
+.PP
+The following are generic console \fIioctls\fP defined in \fIconsio.h\fP:
+.TP 20
+SCRIOCGETF
+Gets the screen control flags for the given display number.
+.TP 20
+SCRIOCSETC
+Sets the screen control flags for the given display number.
+.PP
+.in +10
+SCRIOCGETF and SCRIOCSETC use the following structure:
+.DT
+.nf
+struct screen_control {
+ int device; /* which screen/display to control */
+ int switches; /* Flags for this screen */
+};
+.fi
+.sp 2
+.RS 6
+.TP 20
+CONSDEV_PRESENT
+Display is present on this system.
+This bit cannot be changed by SCRIOSETC.
+.TP 20
+CONSDEV_KERNEL
+Display is available to the kernel.
+.TP 20
+CONSDEV_USER
+Display is available to the user.
+.TP 20
+CONSDEV_INIT
+Display has been initialized for output.
+.TP 20
+CONSDEV_TTY
+Diplay has been initialuzed for output.
+This bit cannot be changed by SCRIOSETC.
+.TP 20
+CONSDEV_GRA
+Graphics display has been opened directly by minor device number.
+This bit cannot be changed by SCRIOSETC.
+.TP 20
+CONSDEV_NOINPUT
+Prevents the "round-robin" console focus-switching from finding this
+display. This flag is cleared when \fIbuf_emul\fP is closed.
+.TP 20
+SCRSETNIP
+Sets the no-input bit in the screen control flags for the
+display's current file description.
+.TP 20
+SCRCLRNIP
+Clears the no-input bit in the screen control flags for the
+display's current file description.
+.RE
+.PP
+The following \fIioctl\fP is defined in \fIbufemul.h\fP:
+.TP 20
+BUFDISPINFO
+\fIArg\fP returns the following information about the display:
+.PP
+.RS 6
+.TP 20
+BUF_IS_ATR(arg)
+True when the CPU is an IBM 6152 Academic System.
+.TP 20
+BUF_IS_RTPC(arg)
+True when the CPU is an IBM RT PC.
+.TP 20
+BUF_GET_PCCODE(x)
+Get PC-Code version/type byte (IBM 6152 only).
+.bp
+.TP 20
+BUF_GET_VGA(arg)
+Get the type of display connected to the VGA.
+0=none, 1=color, 2=gray. Valid only for the IBM 6152 Academic System.
+.TP 20
+BUF_GET_8514(arg)
+Get the type of display connected to the IBM 8514/A.
+0=none, 1=color, 2=gray. Valid only for the IBM 6152 Academic System.
+.TP 20
+BUF_GET_EGA(arg)
+Returns the value of the switches on the EGA display. Valid only for the IBM RT PC with an EGA card installed.
+.RE
+.PP
+All of the above \fIioctl\fP system calls are device-independent controls
+for dealing with the emulators.
+.PP
+Each emulator has its own set of \fIioctls\fP for its own emulation purposes.
+These other \fIiotls\fP are used in window-manager emulators for operations
+such as passing/positioning the mouse locator for/on the display.
+See the man page for any particular emulator for more information.
+.PP
+.SH NOTE
+On the IBM RT PC, the kernel flashes ``98'' on the LEDs if it cannot find any
+configured display during initialization, and then proceeds.
+.SH DIAGNOSTICS
+None.
+.SH FILES
+.PP
+For the IBM RT PC:
+.br
+/dev/console
+.br
+/dev/aed
+.br
+/dev/apa16
+.br
+/dev/apa8c
+.br
+/dev/apa8
+.br
+/dev/ega
+.br
+/dev/mono
+.br
+/dev/mpel
+.br
+.PP
+For the IBM 6152 Academic System:
+.br
+/dev/vga
+.br
+/dev/ibm8514
+.SH "SEE ALSO"
+bufemul(4), bus(4), ibm5081(4), ibm5151(4), ibm6153(4), ibm6154(4),
+ibm6155(4), ibm8514(4), ibmaed(4), ibmemul(4), kbdemul(4),
+speaker(4), stdemul(4), tty(4), vga(4), xemul(4), setscreen(8)
+.br
+``IBM/4.3 Console Emulators'', in Volume II, Supplementary Documents
+
+
diff --git a/share/doc/iso/wiscman/cons.4p b/share/doc/iso/wiscman/cons.4p
new file mode 100644
index 0000000..c8b152b
--- /dev/null
+++ b/share/doc/iso/wiscman/cons.4p
@@ -0,0 +1,196 @@
+.TH CONS 4P "9 December 1988"
+.ds ]W Wisconsin ARGO 1.0
+.UC 4
+.SH NAME
+CONS \- Connection Oriented Network Service
+.SH SYNOPSIS
+For use as a network service (CONS):
+.nf
+.sp
+\fB#include <sys/socket.h>\fR
+\fB#include <sys/mbuf.h>\fR
+\fB#include <netargo/iso.h>\fR
+\fB#include <netargo/cons.h>\fR
+\fB#include <netargo/iso_errno.h>\fR
+.sp
+\fBint cons_output(isop, m, len, isdatagram)\fR
+.sp
+or for use as a subnetwork service (COSNS):
+.sp
+\fB#include <sys/socket.h>\fR
+\fB#include <sys/mbuf.h>\fR
+\fB#include <netargo/iso.h>\fR
+\fB#include <net/if.h>\fR
+\fB#include <netargo/cons.h>\fR
+\fB#include <netargo/iso_errno.h>\fR
+.sp
+\fBint cosns_output(ifp, m, dst)
+.fi
+.SH DESCRIPTION
+.PP
+The Connection Oriented Network Service (CONS) implemented for the AOS R2
+at the University of Wisconsin - Madison
+supports transport protocols, acting as a network service,
+and it also supports other network protocols, acting as a subnetwork
+service or link-layer service.
+Several software modules are combined to provide these services.
+.TP 10
+X.25
+The CCITT X.25 packet layer and link layer protocols run on
+a coprocessor (the EICON Network Adapter), which serves as a DTE.
+.TP 10
+Ecn driver
+A device driver manages the interaction between
+the coprocessor and the PC/RT.
+.TP 10
+CONS "glue"
+A software module implements portions of the OSI CONS (ISO 8878),
+which describes a way to use the X.25 protocols to support the
+OSI connection-oriented network service.
+.PP
+The OSI CONS contains several "service elements"
+that ARGO does not use or support.
+Expedited data,
+quality of service maintenance,
+call collision resolution,
+permanent virtual circuits,
+user data on connect and release,
+user-level acknowledgement
+("receipt confirmation" in CCITT/ISO argot), and reset/resynchronize
+are not supported.
+Several of the service primitives for connection establishment
+and release are not supported, and
+numerous parameters to other primitives specified in the OSI CONS
+are not supported.
+The CONS glue does provide all the support necessary to run
+ISO transport classes 0 and 4 over X.25, and ISO CLNP (also called
+ISO IP) over X.25.
+The subnetwork dependent convergence functions implemented in the glue
+permit interoperability with
+OSINET and EAN at this writing.
+Interoperability with other networks will be established in the future.
+.PP
+The coprocessor that implements the X.25 link and packet layers
+is the Eicon Technologies Access/X.25 Stand-Alone Network Adaper
+(see \fIecn(4)\fR).
+.PP
+The glue module provides two interfaces to higher layers:
+a "subnetwork service" (COSNS) used by network layer protocols, which
+has a typical BSD kernel device driver interface
+and
+a "network service" (CONS) used by transport protocols, which has
+a procedure call interface similar to that of IP and CLNP.
+.PP
+The network service is reliable and sequenced but does not
+provide a graceful close service; it provides only an abort service.
+.PP
+The subnetwork service is neither reliable nor sequenced.
+The subnetwork service implemented by the glue hides the
+connection-oriented aspects of the protocols; nevertheless,
+we call it the "connection-oriented subnetwork service" (COSNS)
+here, for lack of a better name.
+.SS "LIBRARIES
+No libraries are needed to use the CONS, however,
+the numerous error values returned by X.25 cannot be accommodated
+by the standard \fIperror()\fR in the C library.
+The ISO library
+.nf
+.sp
+.in +5
+\fC/usr/argo/lib/libisodir.a\fR
+.in -5
+.sp
+.fi
+provides an expanded perror() to handle the additional error return codes.
+.SS "ERROR VALUES
+.PP
+The error codes returned by the CONS are taken from
+the diagnostic code of the X.25 level 3 packets, as
+descibed in figure 14-B of ISO 8208 (the ISO standard which
+is equivalent to CCITT X.25).
+The actual error value returned in
+\fIerrno\fR
+is the X.25 diagnostic code in the lower 10 bits
+logically "or"ed with the hexadecimal value 0x8400 (bits
+10 and 15 set, counting from zero at the the least significant bit).
+The error values can be found in
+the file
+.nf
+.in +5
+.sp
+\fC<netargo/iso_errno.h>\fR
+.sp
+.in -5
+.fi
+.SS "PROTOCOL IDENTIFICATION
+.PP
+The purpose of this section is to describe how incoming packets
+are forwarded from the glue to the various higher
+layers (ISO transport, CLNP), how
+routes are chosen from the higher layers to the glue, and
+how NSAP-addresses are related to all this.
+.SS Outgoing path:
+The ARGO transport entity routes packets either to
+the CONS glue, to the CLNP module, or to the DARPA Internet IP
+module, based on the value of the network service parameter
+given to the transport layer by the user.
+The \fInetserv\fR property of records in the ARGO
+directory service database
+can be used to determine the network service to be used by the
+transport layer. See also \fIisodir(5)\fR and \fIisodir(3)\fR.
+.PP
+The connectionless network layer entity routes packets to the
+COSNS based on the routing table entries in the connectionless network layer.
+This means that any type of NSAP-address supported by the kernel
+may be used with a CLNP packet
+that is routed over X.25.
+.PP
+When the glue creates an X.25 Call Request packet, it
+places an X.121 address (DTE address)
+in both the Calling and Called DTE address fields.
+The X.121 addresses are extracted from the \fISNPA cache\fR,
+a table that maps NSAP-addresses to SNPA-addresses, and
+is maintained by the ES-IS protocol module of the OSI network layer.
+In addition to placing a DTE address in the X.25 packet,
+the "glue" may
+uses the 1984 Called Address Extension facility to convey the
+NSAP-addresses.
+Whether or not this is done depends on the compile-time option -DX25_1984.
+.SS Incoming path:
+The X.25 Call Request User Data field and the
+1984 X.25 Address Extension Facility are used
+to determing the incoming path through the network layer.
+The NSAP addresses passed up along with the packet are taken from the
+Address Extension facility, if present.
+If the facility is absent, the glue creates two type-37 NSAP-addresses,
+filling in the X.121 address from the called and
+calling DTE-addresses on the Call Request packet, if present.
+The glue then requests of the ES-IS module to add an entry to the
+SNPA cache to associate the calling DTE address with its
+derived NSAP-address.
+These cache entries have a holding of 5 minutes, and get
+refreshed as long as there is activity on the virtual circuit
+resulting from the call request.
+.PP
+If a Call Request packet contains a protocol identifier
+as described in ISO PTDR 9577, this protocol identifier is used
+to route the packet to the higher layers.
+If there is no protocol identifier, the higher layer is assumed to be ISO
+transport.
+.SH "BUGS
+.PP
+If an incoming X.25 Call Request contains no DTE-addresses and
+no NSAP-addresses (in the Address Extension facility)
+the kernel panics.
+.SH "SEE ALSO
+.PP
+isodir(3),
+ecn(4),
+clnp(4),
+tp(4),
+isodir(5),
+isoroute(8),
+ifconfig(8),
+netstat(1),
+xstat(8),
+"ARGO 1.0 Kernel Programmer's Manual"
diff --git a/share/doc/iso/wiscman/if.4n b/share/doc/iso/wiscman/if.4n
new file mode 100644
index 0000000..ca6884c
--- /dev/null
+++ b/share/doc/iso/wiscman/if.4n
@@ -0,0 +1,136 @@
+#
+# 5799-WZQ (C) COPYRIGHT IBM CORPORATION 1988
+# LICENSED MATERIALS - PROPERTY OF IBM
+# REFER TO COPYRIGHT INSTRUCTIONS FORM NUMBER G120-2083
+#
+.\"# $Header:if.4n_ca 1.5$
+.\"# $ACIS:if.4n_ca 1.5$
+.\"# $Source: /ibm/acis/usr/man/man4/RCS/if.4n_ca,v $
+.\" @(#)if.4n 1.2 87/08/23 3.2/4.3NFSSRC
+.\" @(#)if.4n 1.2 87/02/10 NFSSRC
+.\" @(#)if.4n 1.1 86/09/25 SMI;
+.TH IF 4N "Sept 1988" "Space overwritten" " "
+.AC 1 0
+.SH NAME
+if \- general properties of network interfaces (NFS only)
+.SH DESCRIPTION
+.IX "if device" "" "\fLif\fP \(em network interface general properties" "" PAGE START
+Each network interface in a system corresponds to a
+path through which messages may be sent and received. A network
+interface usually has a hardware device associated with it, but
+certain interfaces, such as the loopback interface
+.IR lo (4),
+do not.
+.LP
+At boot time each interface which has underlying hardware support
+makes itself known to the system during the autoconfiguration
+process. Once the interface has acquired its address it is
+expected to install a routing table entry so that messages may
+be routed through it. Most interfaces require some part of
+their address specified with an SIOCSIFADDR ioctl before they
+will allow traffic to flow through them. On interfaces where
+the network-link layer address mapping is static, only the
+network number is taken from the ioctl; the remainder is found
+in a hardware specific manner. On interfaces which provide
+dynamic network-link layer address mapping facilities (for example,
+10Mb/s Ethernets using
+.IR arp (4P),),
+the entire address specified in the ioctl is used.
+.LP
+The following
+.I ioctl
+calls may be used to manipulate network interfaces. Unless
+specified otherwise, the request takes an
+.I ifreq
+structure as its parameter. This structure has the form
+.RS
+.nf
+struct ifreq {
+ char ifr_name[16]; /* name of interface (e.g. "ec0") */
+ union {
+ struct sockaddr ifru_addr;
+ struct sockaddr ifru_dstaddr;
+ short ifru_flags;
+ } ifr_ifru;
+#define ifr_addr ifr_ifru.ifru_addr /* address */
+#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */
+#define ifr_flags ifr_ifru.ifru_flags /* flags */
+};
+.fi
+.RE
+.IP SIOCSIFADDR 5
+.IX "ioctls for sockets" "SIOCSIFADDR" "\fLioctl\fP's for sockets" "\fLSIOCSIFADDR\fP \(em set ifnet address"
+.IX "SIOCSIFADDR set ifnet address" "" "\fLSIOCSIFADDR\fP \(em set ifnet address"
+.IX set "ifnet address ioctl \(em \fLSIOCSIFADDR\fP"
+.IX "network interface ioctls" SIOCSIFADDR "network interface \fLioctl\fP's" "\fLSIOCSIFADDR\fP \(em set ifnet address"
+Set interface address. Following the address
+assignment, the ``initialization'' routine for
+the interface is called.
+.IP SIOCGIFADDR
+.IX "ioctls for sockets" "SIOCGIFADDR" "\fLioctl\fP's for sockets" "\fLSIOCGIFADDR\fP \(em get ifnet address"
+.IX "SIOCGIFADDR get ifnet address" "" "\fLSIOCGIFADDR\fP \(em get ifnet address"
+.IX get "ifnet address \fLioctl\fP \(em \fLSIOCGIFADDR\fP"
+.IX "network interface ioctls" SIOCGIFADDR "network interface \fLioctl\fP's" "\fLSIOCGIFADDR\fP \(em get ifnet address"
+Get interface address.
+.IP SIOCSIFDSTADDR
+.IX "ioctls for sockets" "SIOCSIFDSTADDR" "\fLioctl\fP's for sockets" "\fLSIOCSIFDSTADDR\fP \(em set p-p address"
+.IX "SIOCSIFDSTADDR set p-p address" "" "\fLSIOCSIFDSTADDR\fP \(em set p-p address"
+.IX set "p-p address ioctl \(em \fLSIOCSIFDSTADDR\fP"
+.IX "network interface ioctls" SIOCSIFDSTADDR "network interface \fLioctl\fP's" "\fLSIOCSIFDSTADDR\fP \(em set p-p address"
+Set point to point address for interface.
+.IP SIOCGIFDSTADDR
+.IX "ioctls for sockets" "SIOCGIFDSTADDR" "\fLioctl\fP's for sockets" "\fLSIOCGIFDSTADDR\fP \(em get p-p address"
+.IX "SIOCGIFDSTADDR get p-p address" "" "\fLSIOCGIFDSTADDR\fP \(em get p-p address"
+.IX get "p-p address \fLioctl\fP \(em \fLSIOCGIFDSTADDR\fP"
+.IX "network interface ioctls" SIOCGIFDSTADDR "network interface \fLioctl\fP's" "\fLSIOCGIFDSTADDR\fP \(em get p-p address"
+Get point to point address for interface.
+.IP SIOCSIFFLAGS
+.IX "ioctls for sockets" "SIOCSIFFLAGS" "\fLioctl\fP's for sockets" "\fLSIOCSIFFLAGS\fP \(em set ifnet flags"
+.IX "SIOCSIFFLAGS set ifnet flags" "" "\fLSIOCSIFFLAGS\fP \(em set ifnet flags"
+.IX set "ifnet flags ioctl \(em \fLSIOCSIFFLAGS\fP"
+.IX "network interface ioctls" SIOCSIFFLAGS "network interface \fLioctl\fP's" "\fLSIOCSIFFLAGS\fP \(em set ifnet flags"
+Set interface flags field. If the interface is marked down,
+any processes currently routing packets through the interface
+are notified.
+.IP SIOCGIFFLAGS
+.IX "ioctls for sockets" "SIOCGIFFLAGS" "\fLioctl\fP's for sockets" "\fLSIOCGIFFLAGS\fP \(em get ifnet flags"
+.IX "SIOCGIFFLAGS get ifnet flags" "" "\fLSIOCGIFFLAGS\fP \(em get ifnet flags"
+.IX get "ifnet flags \fLioctl\fP \(em \fLSIOCGIFFLAGS\fP"
+.IX "network interface ioctls" SIOCGIFFLAGS "network interface \fLioctl\fP's" "\fLSIOCGIFFLAGS\fP \(em get ifnet flags"
+Get interface flags.
+.IP SIOCGIFCONF
+.IX "ioctls for sockets" "SIOCGIFCONF" "\fLioctl\fP's for sockets" "\fLSIOCGIFCONF\fP \(em get ifnet list"
+.IX "SIOCGIFCONF get ifnet list" "" "\fLSIOCGIFCONF\fP \(em get ifnet list"
+.IX get "ifnet list \fLioctl\fP \(em \fLSIOCGIFCONF\fP"
+.IX "network interface ioctls" SIOCGIFCONF "network interface \fLioctl\fP's" "\fLSIOCGIFCONF\fP \(em get ifnet list"
+Get interface configuration list. This request takes an
+.I ifconf
+structure (see below) as a value-result parameter. The
+.I ifc_len
+field should be initially set to the size of the buffer
+pointed to by
+.IR ifc_buf .
+On return it will contain the length, in bytes, of the
+configuration list.
+.RS
+.nf
+/*
+ * Structure used in SIOCGIFCONF request.
+ * Used to retrieve interface configuration
+ * for machine (useful for programs which
+ * must know all networks accessible).
+ */
+struct ifconf {
+ int ifc_len; /* size of associated buffer */
+ union {
+ caddr_t ifcu_buf;
+ struct ifreq *ifcu_req;
+ } ifc_ifcu;
+#define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */
+#define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */
+};
+.RE
+.fi
+.IX "if device" "" "\fLif\fP \(em network interface general properties" "" PAGE END
+.SH "SEE ALSO
+arp(4P), ec(4S), lo(4)
diff --git a/share/doc/iso/wiscman/iso.4f b/share/doc/iso/wiscman/iso.4f
new file mode 100644
index 0000000..5e7ca88
--- /dev/null
+++ b/share/doc/iso/wiscman/iso.4f
@@ -0,0 +1,87 @@
+.TH ISO 4F "9 December 1988"
+.ds ]W Wisconsin ARGO 1.0
+.UC 4
+.SH NAME
+iso \- ISO protocol family
+.SH SYNOPSIS
+.B #include <sys/types.h>
+.br
+.B #include <netargo/iso.h>
+.SH DESCRIPTION
+The ISO protocol family is a collection of protocols
+that uses the ISO address format.
+The ISO family provides protocol support for the
+SOCK_SEQPACKET abstraction through the TP protocol (ISO 8073),
+and for the SOCK_RAW abstraction
+by providing direct access (for debugging) to the
+CLNP (ISO 8473) network layer protocol.
+.SH ADDRESSING
+ISO addresses are based upon ISO 8348/AD2,
+"Addendum to the Network Service Definition Covering Network Layer Addressing."
+.PP
+Sockets bound to the OSI protocol family use
+the following address structure:
+.sp 1
+.nf
+._f
+struct sockaddr_iso {
+ short siso_family;
+ u_short siso_tsuffix;
+ struct iso_addr siso_addr;
+};
+.sp 1
+.fi
+.PP
+This fields of this structure are:
+.TP 10
+\fIsiso_family:\fR
+Identifies the domain: AF_ISO or AF_INET.
+.TP 10
+\fIsiso_tsuffix:\fR
+The transport part of the address, described below.
+.TP 10
+\fIsiso_addr:\fR
+The network part of the address, described below.
+.SS TRANSPORT ADDRESSING
+.PP
+The above structure describes a simple form of
+ISO \fItransport\fR addresses.
+An ISO transport address is similar to an Internet address in that
+it contains a network-address portion and a portion that the
+transport layer uses to multiplex its services among clients.
+In the Internet domain, this portion of the address is called a \fIport\fR.
+In the ISO domain, this is called a \fItransport selector\fR
+(also known at one time as a \fItransport suffix\fR).
+While ports are always 16 bits,
+transport selectors may be
+of (almost) arbitrary size.
+ARGO supports two forms of transport selectors:
+"normal" or 16-bit selectors, and
+"extended" selectors, or selectors that may be from 1-64 bytes
+in length.
+The default mode of operation is to use 16-bit transport selectors.
+These addresses can be represented with the above structure.
+When transport selectors of any other size are used, the transport
+selector is kept in a separate structure.
+See the manual page \fItp(4p)\fR.
+.SS NETWORK ADDRESSING
+.PP
+ISO network addresses are limited to 20 bytes in length.
+ISO network addresses can take any format.
+ARGO 1.0 supports three formats.
+See \fIisodir(3)\fR and \fIisodir(5)\fR.
+.SH PROTOCOLS
+The ARGO 1.0 implementation of the
+ISO protocol family comprises
+the Connectionless-Mode Network Protocol (CLNP),
+and the Transport Protocol (TP), classes 4 and 0,
+and X.25.
+TP is used to support the SOCK_SEQPACKET
+abstraction.
+A raw interface to CLNP is available
+by creating an ISO socket of type SOCK_RAW.
+This is used for CLNP debugging only.
+.SH SEE ALSO
+tp(4P), cons(4p), clnp(4P), isodir(3), iso(4f), isodir(5),
+"The ARGO 1.0 Kernel Programmer's Guide",
+"Installing ARGO 1.0 on Academic Operating Systems 4.3 Release 2"
diff --git a/share/doc/iso/wiscman/rvd.4p b/share/doc/iso/wiscman/rvd.4p
new file mode 100644
index 0000000..44d7e84
--- /dev/null
+++ b/share/doc/iso/wiscman/rvd.4p
@@ -0,0 +1,90 @@
+.\"
+.\" 5799-WZQ (C) COPYRIGHT IBM CORPORATION 1986,1987,1988
+.\" LICENSED MATERIALS - PROPERTY OF IBM
+.\" REFER TO COPYRIGHT INSTRUCTIONS FORM NUMBER G120-2083
+.\"
+.\"$Header:rvd.4p_ca 11.0$
+.\"$ACIS:rvd.4p_ca 11.0$
+.\"$Source: /ibm/acis/usr/man/man4/RCS/rvd.4p_ca,v $
+.\"This file contains -man macros.
+.TH RVD 4P "July 1987" "Space overwritten by .AC macro" " "
+.AC 1 0
+.SH NAME
+rvd \- Remote Virtual Disk protocol
+.SH DESCRIPTION
+RVD
+is a network service which allows several
+physical machines to share one
+physical mass storage device such as a hard disk. The basic
+concept is to have the machine to which the device is physically attached
+act as a server to read and write blocks for all the other
+machines desiring use of the resource.
+.PP
+The server program apportions the
+physical blocks into \*(lqvirtual disk packs\*(rq based on a
+table maintained with
+.IR vddb (8).
+The packs can then be used separately by clients. There are
+three modes of use: read-only, shared, and exclusive.
+Exclusive mode is used for
+read-write access, while read-only mode is as it sounds.
+Shared mode is not supported under IBM/4.3.
+If a disk pack is \*(lqspun up\*(rq in read-only mode,
+several clients may share the pack and read its information. In
+exclusive mode, one client has exclusive use of the disk pack.
+.PP
+Packs are \*(lqspun up\*(rq and \*(lqspun down\*(rq with the
+.I up
+and
+.I down
+commands (see
+.IR up (1)).
+This can be done
+at reboot time within
+.I /etc/rc.local
+(see
+.IR rc (8))
+or
+at login time within
+.I ~/.login
+(see
+.IR csh (1)).
+Once a pack is spun up, it behaves like a disk physically attached to
+the local machine (excepting network latency).
+The client can do anything desired with the pack;
+both MS-DOS and UNIX operating system file systems have
+been used on the same physical
+drive at the same time (on separate packs, of course).
+.PP
+RVD
+is implemented in two parts: server code and client code. The server
+code is written as a
+.IR "user process" ,
+i.e. it does not require any special
+privileges beyond read/write access to the disks it manages. The server
+opens a network socket and listens for UDP connections. It also accepts
+all
+RVD
+packets and acts on them.
+RVD is a protocol different from both
+UDP and TCP,
+although similar in nature to the former.
+.PP
+The client code is implemented as a pseudo-device and corresponding
+device driver in the kernel. It can handle up to 10
+remote virtual disks
+simultaneously, which are associated with the pseudo-devices below.
+.SH FILES
+.DT
+/dev/vd[0-9]a block special file pseudo-device
+.br
+/dev/rvd[0-9]a character special file pseudo-device
+.SH "SEE ALSO"
+up(1), rvddb(5), rvdtab(5),
+rvdflush(8), rvdchlog(8), rvddown(8),
+rvdexch(8), rvdflush(8), rvdlog(8), rvdsend(8), rvdshow(8),
+rvdshut(8), rvdsrv(8), savervd(8),
+spinup(8), vddb(8), vdstats(8)
+.br
+``The Remote Virtual Disk System'' in Volume II, Supplementary Documents
+
diff --git a/share/doc/iso/wiscman/tp.4p b/share/doc/iso/wiscman/tp.4p
new file mode 100644
index 0000000..64dbf2f
--- /dev/null
+++ b/share/doc/iso/wiscman/tp.4p
@@ -0,0 +1,609 @@
+.TH TP 4P "9 December 1988"
+.ds ]W Wisconsin ARGO 1.0
+.UC 4
+.SH NAME
+TP \- ISO Transport Protocol
+.SH SYNOPSIS
+.nf
+\fB#include <sys/socket.h>\fR
+\fB#include <netargo/iso_errno.h>\fR
+\fB#include <netargo/tp_param.h>\fR
+\fB#include <netargo/tp_user.h>\fR
+.PP
+\fBs = socket( [ AF_INET, AF_ISO ] , SOCK_SEQPACKET, 0);\fR
+.SH DESCRIPTION
+.PP
+The ISO Transport Protocol implemented for AOS R2
+at the University of Wisconsin - Madison
+includes classes 0 and 4
+of the ISO transport protocols
+as specified in
+the September 1984 version of DIS 8073.
+Class 4 of the protocol provides reliable, sequenced,
+flow-controlled, two-way
+transmission of data packets with an alternate stop-and-wait data path called
+the "expedited data" service.
+Class 0 is essentially a null transport protocol, which is used
+when the underlying network service provides reliable, sequenced,
+flow-controlled, two-way data transmission.
+Class 0 does not provide the expedited data service.
+The protocols are implemented as a single transport layer entity
+that coexists with the Internet protocol suite.
+Class 0 may be used only in the ISO domain.
+Class 4 may be used in the Internet domain as well as in the ISO domain.
+.PP
+The user interface to this protocol is the Berkeley interprocess communication
+interface (see \fIsocket(2)\fR.)
+Two new system calls for sending and receiving
+were added to this interface to
+to permit the support the end-of-transport-service-data-unit (EOTSDU)
+indication.
+See \fIsendv(2)\fR and \fIrecvv(2)\fR.
+If the EOTSDU is not needed, the Berkeley 4.3BSD interface
+can be used.
+See \fIsend(2)\fR and \fIrecv(2)\fR.
+.PP
+Through the
+\fIgetsockopt\fR and \fIsetsockopt\fR
+system calls,
+TP supports several options
+to control such things as negotiable options
+in the protocol and protocol strategies.
+.\"These system calls are used
+.\"to submit data for inclusion on connect and disconnect TPDUs.
+The options are defined in \fB<netargo/tp_user.h>\fR,
+and are described below.
+.\".PP
+.\"The options marked with a percent sign ( \fB%\fR )
+.\"are limited to use by the super-user.
+.PP
+In the tables below,
+the options marked with a pound sign ( \fB#\fR )
+may be used
+with \fIsetsockopt()\fR
+after a connection is established.
+Others must be used before the connection
+is established, in other words,
+before calling
+\fIconnect()\fR or
+\fIaccept()\fR.
+All options may be used
+with \fIgetsockopt()\fR
+before or
+after a connection is established.
+.\"
+.\" .PP
+.\" The options marked with an exclamation point ( \fB!\fR )
+.\" may be used after a connection is released,
+.\" but before
+.\" the TP reference timer (which generally
+.\" has a value in minutes) expires, and before
+.\" a \fIclose()\fR system call.
+.\" In other words, these commands may be used when the peer closes
+.\" a connection (possibly causing a disconnect indication), as long as the command
+.\" is issued "soon" after the disconnection occurred.
+.\" Disconnect data may be sent by the side initiating the close
+.\" but not by the passive side ("passive" with respect to the closing
+.\" of the connection), so there is no need to read disconnect data
+.\" after calling \fIclose()\fR.
+.\" .PP
+.\" The implementation of data on connect and disconnect is incomplete
+.\" and is not supported.
+.sp 1
+.TP 25
+\fBName\fR
+\fBValue [default]\fR
+.IP
+\fBDescription\fR
+.\".TP 25
+.\"TPOPT_CONN_DATA
+.\"(char *) [none]
+.\".IP
+.\"Data to send on \fIconnect()\fR.
+.\".TP 25
+.\"TPOPT_DISC_DATA\fB # !\fR
+.\"(char *) [none]
+.\".IP
+.\"Data to send on \fIclose()\fR.
+.\".TP 25
+.\"TPOPT_CDDATA_CLEAR\fB #\fR
+.\"No associated value.
+.\".IP
+.\"Erase outgoing connect or disconnect data.
+.TP 25
+TPOPT_MY_TSEL\fB \fR
+1-64 bytes.
+.IP
+An "extended" transport selector (tsel) for this socket.
+This option is used to set or get the local tsel.
+When this option is used to set a tsel,
+the default the 2-byte tsel
+that may have been allocated by \fIbind()\fR
+is retained, but this "extended" tsel is the
+tsel that is transmitted in a connection request
+When this option is used to get a tsel,
+it will return whatever transport tsel exists;
+if no "extended" tsel was given to this socket,
+the 2-byte tsel is returned.
+.TP 25
+TPOPT_PEER_TSEL\fB \fR
+1-64 bytes.
+.IP
+An "extended" transport selector (tsel) for the
+peer transport entity.
+This option is used to get the peer's tsel after
+a connection is established.
+When used before a connection
+is established, this option can set the tsel that
+will be transmitted as the "called" tsel
+in a connection request.
+.TP 25
+TPOPT_PERF_MEAS\fB #\fR
+Boolean.
+.IP
+When \fBtrue\fR, performance measurements will be kept
+for this connection.
+When set before a connection is established, the
+active side will use a locally defined parameter on the
+connect request packet; if the peer is another ARGO
+implementation, this will cause performance measurement to be
+turned on
+on the passive side as well.
+See \fItpperf(8)\fR.
+.TP 25
+TPOPT_PSTATISTICS\fB\fR
+No associated value on input.
+On output, struct tp_pmeas.
+.IP
+This command is used to read the performance statistics accumulated
+during a connection's lifetime.
+It can only be used with \fIgetsockopt()\fR.
+The structure it returns is described in \fB<netargo/tp_stat.h>\fR.
+See \fItpperf(8)\fR.
+.TP 25
+TPOPT_FLAGS
+unsigned integer. [ 0x0 ]
+.IP
+This command can only be used with \fIgetsockopt()\fR.
+See the description of the flags below.
+.TP 25
+TPOPT_PARAMS\fB\fR
+struct tp_conn_param.
+.IP
+Used to get or set a group parameters for a connection.
+The struct tp_conn_param is the argument used with the
+\fIgetsockopt()\fR or \fIsetsockopt()\fR system call.
+It is described in
+\fB<netargo/tp_user.h>\fR.
+.PP
+The fields of the \fItp_conn_param\fR structure are
+described below.
+.nf
+.sp 1
+\fIValues for TPOPT_PARAMS:\fR
+.fi
+.TP 25
+\fBField\fR
+\fBValue [default]\fR
+.IP
+\fBDescription\fR
+.\" ******************8
+.TP 25
+p_Nretrans
+nonzero short integer [ 1 ]
+.IP
+Number of times a TPDU will be retransmitted before the
+local TP entity closes a connection.
+.\" ******************8
+.TP 25
+p_dr_ticks
+nonzero short integer [ various ]
+.IP
+Number of clock ticks between retransmissions of disconnect request TPDUs.
+.\" ******************8
+.TP 25
+p_dt_ticks
+nonzero short integer [ various ]
+.IP
+Number of clock ticks between retransmissions of data TPDUs.
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_cr_ticks
+nonzero short integer [ various ]
+.IP
+Number of clock ticks between retransmissions of connection request TPDUs.
+.\" ******************8
+.TP 25
+p_cc_ticks
+nonzero short integer [ various ]
+.IP
+Number of clock ticks between retransmissions of connection confirm TPDUs.
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_x_ticks
+nonzero short integer [ various ]
+.IP
+Number of clock ticks between retransmissions of expedited data TPDUs.
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_sendack_ticks
+nonzero short integer [ various ]
+.IP
+Number of clock ticks that the local TP entity
+will wait before sending an acknowledgment for normal data
+(not applicable if the acknowlegement strategy is TPACK_EACH).
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_ref_ticks
+nonzero short integer [ various ]
+.IP
+Number of clock ticks for which a reference will
+be considered frozen after the connection to which
+it applied is closed.
+This parameter applies to classes 4 and 0 in the
+ARGO implementation, despite the fact that
+the frozen reference function is required only for
+class 4.
+.\" ******************8
+.TP 25
+p_inact_ticks
+nonzero short integer [ various ]
+.IP
+Number of clock ticks without an incoming packet from the peer after which
+TP close the connection.
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_keepalive_ticks
+nonzero short integer [ various ]
+.IP
+nonzero short integer [ various ]
+Number of clock ticks between acknowledgments that are sent
+to keep an inactive connection open (to prevent the peer's
+inactivity control function from closing the connection).
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_winsize
+short integer between 128 and 16384. [4096 bytes]
+.IP
+The buffer space limits in bytes for incoming and outgoing data.
+There is no way to specify different limits for incoming and outgoing
+paths.
+The actual window size at any time
+during the lifetime of a connection
+is a function of the buffer size limit, the negotiated
+maximum TPDU size, and the
+rate at which the user program receives data.
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_tpdusize
+unsigned char between 0x7 and 0xd.
+[ 0xc for class 4 ] [ 0xb for class 0 ]
+.IP
+Log 2 of the maximum TPDU size to be negotiated.
+The TP standard (ISO 8473) gives an upper bound of
+0xd for class 4 and 0xb for class 0.
+The ARGO implementation places upper bounds of
+0xc on class 4 and 0xb on class 0.
+.\" ******************8
+.TP 25
+p_ack_strat
+TPACK_EACH or TPACK_WINDOW. [ TPACK_WINDOW ]
+.IP
+This parameter applies only to class 4.
+Two acknowledgment strategies are supported:
+.IP
+TPACK_EACH means that each data TPDU is acknowledged
+with an AK TPDU.
+.IP
+TPACK_WINDOW
+means that upon receipt of the packet that represents
+the high edge of the last window advertised, and AK TPDU is generated.
+.\" ******************8
+.TP 25
+p_rx_strat
+4 bit mask
+[ TPRX_USE_CW | TPRX_FASTSTART over
+connectionless network protocols ]
+[ TPRX_USE_CW over
+connection-oriented network protocols ]
+.IP
+This parameter applies only to class 4.
+The bit mask may include the following values:
+.IP
+TPRX_EACH: When a retransmission timer expires, retransmit
+each packet in the send window rather than
+just the first unacknowledged packet.
+.IP
+TPRX_USE_CW: Use a "congestion window" strategy borrowed
+from Van Jacobson's congestion window strategy for TCP.
+The congestion window size is set to one whenever
+a retransmission occurs.
+.IP
+TPRX_FASTSTART: Begin sending the maximum amount of data permitted
+by the peer (subject to availability).
+The alternative is to start sending slowly by
+pretending the peer's window is smaller than it is, and letting
+it slowly grow up to the real peer's window size.
+This is to smooth the effect of new connections on a congested network
+by preventing a transport connection from suddenly
+overloading the network with a burst of packets.
+This strategy is also due to Van Jacobson.
+.\" ******************8
+.TP 25
+p_class
+5 bit mask
+[ TP_CLASS_4 | TP_CLASS_0 ]
+.IP
+Bit mask including one or both of the values TP_CLASS_4 and TP_CLASS_0.
+The higher class indicated is the preferred class.
+If only one class is indicated, negotiation will not occur
+during connection establishment.
+.\" ******************8
+.TP 25
+p_xtd_format
+Boolean.
+[ false ]
+.IP
+Boolean indicating that extended format shall be negotiated.
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_xpd_service
+Boolean.
+[ true ]
+.IP
+Boolean indicating that
+the expedited data transport service will be negotiated.
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_use_checksum
+Boolean.
+[ true ]
+.IP
+Boolean indicating the the use of checksums will be negotiated.
+This parameter applies only to class 4.
+.\" ******************8
+.TP 25
+p_use_nxpd
+Reserved for future use.
+.\" ******************8
+.TP 25
+p_use_rcc
+Reserved for future use.
+.\" ******************8
+.TP 25
+p_use_efc
+Reserved for future use.
+.\" ******************8
+.TP 25
+p_no_disc_indications
+Boolean.
+[ false ]
+.IP
+Boolean indicating that the local TP entity shall not issue
+indications (signals) when a TP connection is disconnected.
+.\" ******************8
+.TP 25
+p_dont_change_params
+Boolean.
+[ false ]
+.IP
+If \fBtrue\fR the TP entity will not override
+any of the other values given in this structure.
+If the values cannot be used, the TP entity will drop, disconnect,
+or refuse to establish the connection to which this structure pertains.
+.\" ******************8
+.TP 25
+p_netservice
+One of { ISO_CLNS, ISO_CONS, ISO_COSNS, IN_CLNS }.
+[ ISO_CLNS ]
+.IP
+Indicates which network service is to be used.
+.IP
+ISO_CLNS indicates the connectionless network service provided
+by CLNP (ISO 8473).
+.IP
+ISO_CONS indicates the connection-oriented network service provided
+by X.25 (ISO 8208) and ISO 8878.
+.IP
+ISO_COSNS indicates the
+connectionless network service running over a
+connection-oriented subnetwork service : CLNP (ISO 8473) over X.25 (ISO 8208).
+.IP
+IN_CLNS indicates the
+DARPA Internet connectionless network service provided by IP (RFC 791).
+.\" ******************8
+.TP 25
+p_dummy
+Reserved for future use.
+.sp 1
+.PP
+The TPOPT_FLAGS option is used for obtaining
+various boolean-valued options.
+Its meaning is as follows.
+The bit numbering used is that of the PC/RT, which means that bit
+0 is the most significant bit, while bit 8 is the least significant bit.
+.nf
+.sp 1
+\fIValues for TPOPT_FLAGS:\fR
+.fi
+.TP 10
+\fBBits\fR
+\fBDescription [Default]\fR
+.TP 10
+0
+TPFLAG_NLQOS_PDN : set when the quality of the
+network service is
+similar to that of a public data network.
+.TP 10
+1
+TPFLAG_PEER_ON_SAMENET : set when the peer TP entity
+is considered to be on the same network as the local
+TP entity.
+.TP 10
+2
+Not used.
+.TP 10
+3
+TPFLAG_XPD_PRES : set when expedited data are present
+[ 0 ]
+.TP 10
+4..7
+Reserved.
+.\".TP 10
+.\"4
+.\"Reserved.
+.\".TP 10
+.\"5
+.\"TPFLAG_DISC_DATA_IN : read only flag, if set indicates that
+.\"data from a disconnect TPDU are present.
+.\".TP 10
+.\"6
+.\"Reserved.
+.\".TP 10
+.\"7
+.\"TPFLAG_CONN_DATA_IN : read only flag, if set indicates that
+.\"data from a connect TPDU are present.
+.SH "LIBRARIES
+.PP
+The new system calls \fIrecvv\fR and \fIsendv\fR are supported by a
+library, \fIlibtp.a\fR (rather than a modified C library).
+Also in this
+library are new optional interfaces to the \fIconnect\fR and \fIaccept\fR
+system calls. See LIBTP(3).
+.SH FILES
+.PP
+The following files in have entries necessary for the correct operation
+of the TP utilities.
+.nf
+\fC
+ /etc/isodir
+ /etc/protocols
+\fR
+.fi
+.PP
+The symbolic link is needed for users to write programs using IPC
+with TP:
+.nf
+\fC
+ /usr/include/netargo@ -> /sys/netargo
+\fR
+.fi
+.PP
+The following utilities have changed:
+.nf
+ netstat
+ ifconfig
+ config
+.fi
+.PP
+The following are new utilities and daemons:
+.nf
+ isoroute
+ rlogin.iso, rcp.iso, rsh.iso, isod, rlogind
+ tpdiscard
+ tpping
+ tppt (for maintenance and debugging)
+ bark (for maintenance and debugging)
+ tpfileget, tpfileput (for debugging)
+ tpstat, tpmon
+ tpset
+ tppkt
+ viid
+.fi
+.PP
+In the kernel source, many files have changed or been added.
+For a list of these, see the installation guide,
+"Installing Wisconsin ARGO 1.0 on Academic Operating System 4.3
+Release 2".
+.SH "ERROR VALUES
+.PP
+The TP entity returns \fIerrno\fR error values as defined in
+\fB<sys/errno.h>\fR
+and
+\fB<netargo/iso_errno.h>\fR.
+User programs may print messages associated with these value by
+using an expanded version of \fIperror()\fR
+found in the ISO library, \fIlibisodir.a\fR.
+.PP
+If the TP entity encounters asynchronous events
+that will cause a transport connection to be closed,
+such as
+timing out while retransmitting a connect request TPDU,
+or receiving a DR TPDU,
+the TP entity issues a SIGURG signal, indicating that
+disconnection has occurred.
+If the signal is issued during a
+a system call, the system call may be interrupted,
+in which case the
+\fIerrno\fR value upon return from the system call is EINTR.
+If the signal SIGURG
+is being handled by reading
+from the socket, and it was a \fIaccept()\fR that
+timed out, the read may result in ENOTSOCK,
+because the \fIaccept()\fR call had not yet returned a
+legitimate socket descriptor when the signal was handled.
+ETIMEDOUT (or a some other errno value appropriate to the
+type of error) is returned if SIGURG is blocked
+for the duration of the system call.
+A user program should take one of the following approaches:
+.IP "Block SIGURG." 5
+If the program is servicing
+only one connection, it can block or ignore SIGURG during connection
+establishment.
+The advantage of this is that the \fIerrno\fR value
+returned is somewhat meaningful.
+The disadvantage of this is that
+if ignored, disconnection and expedited data indications could be
+missed.
+For some programs this is not a problem.
+.IP "Handle SIGURG." 5
+If the program is servicing more than one connection at a time
+or expedited data may arrive or both, the program must
+service SIGURG.
+It can use the \fIgetsockopt(...TPOPT_FLAGS...)\fR system
+call to see if the signal
+was due to the arrival of expedited data or due to a disconnection.
+In the latter case,
+\fIgetsockopt()\fR
+will return ENOTCONN.
+.SH BUGS
+.PP
+When running TP over the token ring, if checksumming
+is NOT used, the TP entity sents packets to the lan driver faster than
+the driver can reasonably handle them.
+A bug in the lan driver causes it to reorder the packets in this
+situation, causing an overall degradation of TP performance.
+In general, this is not a problem because very few applications
+will actually be able to send packets this fast.
+Nevertheless,
+in order to prevent this reordering,
+one may induce a delay in the TP entity by setting the 1-byte
+value
+\fItp_delay\fR
+to 1
+using the debugger.
+Hit the <pause> key, then
+type \fB/b tp_delay\fR followed by the <enter key>.
+The debugger will print the value 00.
+You then type \fB1\fR followed by the <enter key>.
+Then type \fBend\fR <enter key>.
+Then type \fBgo\fR <enter key>.
+.SH SEE ALSO
+.PP
+tcp(4P), sendv(2), recvv(2), libtp(3),
+isodir(3), isodir(5), netstat(1),
+iso(4F), clnp(4P), viid(8)
+tppt(8), tpstat(8), bark(8), tppkt(8), tpset(8), tpperf(8)
+isoroute(8), ifconfig(8), isod(8), rlogin.iso(1),
+"Installing Wisconsin ARGO 1.0 on Academic Operating System 4.3
+Release 2",
+"ARGO 1.0 Kernel Programmer's Manual"
diff --git a/share/doc/papers/beyond4.3/Makefile b/share/doc/papers/beyond4.3/Makefile
new file mode 100644
index 0000000..f474dba
--- /dev/null
+++ b/share/doc/papers/beyond4.3/Makefile
@@ -0,0 +1,7 @@
+# @(#)Makefile 5.2 (Berkeley) 6/8/93
+
+DIR= papers/beyond43
+SRCS= beyond43.ms
+MACROS= -ms
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/beyond4.3/beyond43.ms b/share/doc/papers/beyond4.3/beyond43.ms
new file mode 100644
index 0000000..f83e68a
--- /dev/null
+++ b/share/doc/papers/beyond4.3/beyond43.ms
@@ -0,0 +1,518 @@
+.\" Copyright (c) 1989 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)beyond43.ms 5.1 (Berkeley) 6/5/90
+.\"
+.\" *troff -ms
+.rm CM
+.sp 2
+.ce 100
+\fB\s+2Current Research by
+The Computer Systems Research Group
+of Berkeley\s-2\fP
+.ds DT "February 10, 1989
+.\" \fBDRAFT of \*(DT\fP
+.sp 2
+.nf
+Marshall Kirk McKusick
+Michael J Karels
+Keith Sklower
+Kevin Fall
+Marc Teitelbaum
+Keith Bostic
+.fi
+.sp 2
+.ce 1
+\fISummary\fP
+.ce 0
+.PP
+The release of 4.3BSD in April of 1986 addressed many of the
+performance problems and unfinished interfaces
+present in 4.2BSD [Leffler84] [McKusick85].
+The Computer Systems Research Group at Berkeley
+has now embarked on a new development phase to
+update other major components of the system, as well as to offer
+new functionality.
+There are five major ongoing projects.
+The first is to develop an OSI network protocol suite and to integrate
+existing ISO applications into Berkeley UNIX.
+The second is to develop and support an interface compliant with the
+P1003.1 POSIX standard recently approved by the IEEE.
+The third is to refine the TCP/IP networking to improve
+its performance and limit congestion on slow and/or lossy networks.
+The fourth is to provide a standard interface to file systems
+so that multiple local and remote file systems can be supported,
+much as multiple networking protocols are supported by 4.3BSD.
+The fifth is to evaluate alternate access control mechanisms and
+audit the existing security features of the system, particularly
+with respect to network services.
+Other areas of work include multi-architecture support,
+a general purpose kernel memory allocator, disk labels, and
+extensions to the 4.2BSD fast filesystem.
+.PP
+We are planning to finish implementation prototypes for each of the
+five main areas of work over the next year, and provide an informal
+test release sometime next year for interested developers.
+After incorporating feedback and refinements from the testers,
+they will appear in the next full Berkeley release, which is typically
+made about a year after the test release.
+.br
+.ne 10
+.sp 2
+.NH
+Recently Completed Projects
+.PP
+There have been several changes in the system that were included
+in the recent 4.3BSD Tahoe release.
+.NH 2
+Multi-architecture support
+.PP
+Support has been added for the DEC VAX 8600/8650, VAX 8200/8250,
+MicroVAXII and MicroVAXIII.
+.PP
+The largest change has been the incorporation of support for the first
+non-VAX processor, the CCI Power 6/32 and 6/32SX. (This addition also
+supports the
+Harris HCX-7 and HCX-9, as well as the Sperry 7000/40 and ICL machines.)
+The Power 6 version of 4.3BSD is largely based on the compilers and
+device drivers done for CCI's 4.2BSD UNIX,
+and is otherwise similar to the VAX release of 4.3BSD.
+The entire source tree, including all kernel and user-level sources,
+has been merged using a structure that will easily accommodate the addition
+of other processor families. A MIPS R2000 has been donated to us,
+making the MIPS architecture a likely candidate for inclusion into a future
+BSD release.
+.NH 2
+Kernel Memory Allocator
+.PP
+The 4.3BSD UNIX kernel used 10 different memory allocation mechanisms,
+each designed for the particular needs of the utilizing subsystem.
+These mechanisms have been replaced by a general purpose dynamic
+memory allocator that can be used by all of the kernel subsystems.
+The design of this allocator takes advantage of known memory usage
+patterns in the UNIX kernel and a hybrid strategy that is time-efficient
+for small allocations and space-efficient for large allocations.
+This allocator replaces the multiple memory allocation interfaces
+with a single easy-to-program interface,
+results in more efficient use of global memory by eliminating
+partitioned and specialized memory pools,
+and is quick enough (approximately 15 VAX instructions) that no
+performance loss is observed relative to the current implementations.
+[McKusick88].
+.NH 2
+Disk Labels
+.PP
+During the work on the CCI machine,
+it became obvious that disk geometry and filesystem layout information
+must be stored on each disk in a pack label.
+Disk labels were implemented for the CCI disks and for the most common
+types of disk controllers on the VAX.
+A utility was written to create and maintain the disk information,
+and other user-level programs that use such information now obtain
+it from the disk label.
+The use of this facility has allowed improvements in the file system's
+knowledge of irregular disk geometries such as track-to-track skew.
+.NH 2
+Fat Fast File System
+.PP
+The 4.2 fast file sytem [McKusick84]
+contained several statically sized structures,
+imposing limits on the number of cylinders per cylinder group,
+inodes per cylinder group,
+and number of distinguished rotational positions.
+The new ``fat'' filesystem allows these limits to be set at filesystem
+creation time.
+Old kernels will treat the new filesystems as read-only,
+and new kernels
+will accomodate both formats.
+The filesystem check facility, \fCfsck\fP, has also been modified to check
+either type.
+.br
+.ne 10
+.sp 2
+.NH
+Current UNIX Research at Berkeley
+.PP
+Since the release of 4.3BSD in mid 1986,
+we have begun work on several new major areas of research.
+Our goal is to apply leading edge research ideas into a stable
+and reliable implementation that solves current problems in
+operating systems development.
+.NH 2
+OSI network protocol development
+.PP
+The network architecture of 4.2BSD was designed to accommodate
+multiple network protocol families and address formats,
+and an implementation of the ISO OSI network protocols
+should enter into this framework without much difficulty.
+We plan to
+implement the OSI connectionless internet protocol (CLNP),
+and device drivers for X.25, 802.3, and possibly 802.5 interfaces, and
+to integrate these with an OSI transport class 4 (TP-4) implementation.
+We will also incorporate into the Berkeley Software Distribution an
+updated ISO Development Environment (ISODE)
+featuring International Standard (IS) versions of utilities.
+ISODE implements the session and presentation layers of the OSI protocol suite,
+and will include an implementation of the file transfer protocol (FTAM).
+It is also possible that an X.400 implementation now being done at
+University College, London and the University of Nottingham
+will be available for testing and distribution.
+.LP
+This implementation is comprised of four areas.
+.IP 1)
+We are updating the University of
+Wisconsin TP-4 to match GOSIP requirements.
+The University of Wisconsin developed a transport class 4
+implementation for the 4.2BSD kernel under contract to Mitre.
+This implementation must be updated to reflect the National Institute
+of Standards and Technology (NIST, formerly NBS) workshop agreements,
+GOSIP, and 4.3BSD requirements.
+We will make this TP-4 operate with an OSI IP,
+as the original implementation was built to run over the DoD IP.
+.IP 2)
+A kernel version of the OSI IP and ES-IS protocols must be produced.
+We will implement the kernel version of these protocols.
+.IP 3)
+The required device drivers need to be integrated into a BSD kernel.
+4.3BSD has existing device drivers for many ethernet devices; future
+BSD versions may also support X.25 devices as well as token ring
+networks.
+These device drivers must be integrated
+into the kernel OSI protocol implementations.
+.IP 4)
+The existing OSINET interoperability test network is available so
+that the interoperability of the ISODE and BSD kernel protocols
+can be established through tests with several vendors.
+Testing is crucial because an openly available version of GOSIP protocols
+that does not interoperate with DEC, IBM, SUN, ICL, HIS, and other
+major vendors would be embarrassing.
+To allow testing of the integrated pieces the most desirable
+approach is to provide access to OSINET at UCB.
+A second approach is to do the interoperability testing at
+the site of an existing OSINET member, such as the NBS.
+.NH 2
+Compliance with POSIX 1003
+.PP
+Berkeley became involved several months ago in the development
+of the IEEE POSIX P1003.1 system interface standard.
+Since then, we have been parcipating in the working groups
+of P1003.2 (shell and application utility interface),
+P1003.6 (security), P1003.7 (system administration), and P1003.8
+(networking).
+.PP
+The IEEE published the POSIX P1003.1 standard in late 1988.
+POSIX related changes to the BSD system have included a new terminal
+driver, support for POSIX sessions and job control, expanded signal
+functionality, restructured directory access routines, and new set-user
+and set-group id facilities.
+We currently have a prototype implementation of the
+POSIX driver with extensions to provide binary compatibility with
+applications developed for the old Berkeley terminal driver.
+We also have a prototype implementation of the 4.2BSD-based POSIX
+job control facility.
+.PP
+The P1003.2 draft is currently being voted on by the IEEE
+P1003.2 balloting group.
+Berkeley is particularly interested in the results of this standard,
+as it will profoundly influence the user environment.
+The other groups are in comparatively early phases, with drafts
+coming to ballot sometime in the 90's.
+Berkeley will continue to participate in these groups, and
+move in the near future toward a P1003.1 and P1003.2 compliant
+system.
+We have many of the utilities outlined in the current P1003.2 draft
+already implemented, and have other parties willing to contribute
+additional implementations.
+.NH 2
+Improvements to the TCP/IP Networking Protocols
+.PP
+The Internet and the Berkeley collection of local-area networks
+have both grown at high rates in the last year.
+The Bay Area Regional Research Network (BARRNet),
+connecting several UC campuses, Stanford and NASA-Ames
+has recently become operational, increasing the complexity
+of the network connectivity.
+Both Internet and local routing algorithms are showing the strain
+of continued growth.
+We have made several changes in the local routing algorithm
+to keep accommodating the current topology,
+and are participating in the development of new routing algorithms
+and standard protocols.
+.PP
+Recent work in collaboration with Van Jacobson of the Lawrence Berkeley
+Laboratory has led to the design and implementation of several new algorithms
+for TCP that improve throughput on both local and long-haul networks
+while reducing unnecessary retransmission.
+The improvement is especially striking when connections must traverse
+slow and/or lossy networks.
+The new algorithms include ``slow-start,''
+a technique for opening the TCP flow control window slowly
+and using the returning stream of acknowledgements as a clock
+to drive the connection at the highest speed tolerated by the intervening
+network.
+A modification of this technique allows the sender to dynamically modify
+the send window size to adjust to changing network conditions.
+In addition, the round-trip timer has been modified to estimate the variance
+in round-trip time, thus allowing earlier retransmission of lost packets
+with less spurious retransmission due to increasing network delay.
+Along with a scheme proposed by Phil Karn of Bellcore,
+these changes reduce unnecessary retransmission over difficult paths
+such as Satnet by nearly two orders of magnitude
+while improving throughput dramatically.
+.PP
+The current TCP implementation is now being readied
+for more widespread distribution via the network and as a
+standard Berkeley distribution unencumbered by any commercial licensing.
+We are continuing to refine the TCP and IP implementations
+using the ARPANET, BARRNet, the NSF network
+and local campus nets as testbeds.
+In addition, we are incorporating applicable algorithms from this work
+into the TP-4 protocol implementation.
+.NH 2
+Toward a Compatible File System Interface
+.PP
+The most critical shortcoming of the 4.3BSD UNIX system was in the
+area of distributed file systems.
+As with networking protocols,
+there is no single distributed file system
+that provides sufficient speed and functionality for all problems.
+It is frequently necessary to support several different remote
+file system protocols, just as it is necessary to run several
+different network protocols.
+.PP
+As network or remote file systems have been implemented for UNIX,
+several stylized interfaces between the file system implementation
+and the rest of the kernel have been developed.
+Among these are Sun Microsystems' Virtual File System interface (VFS)
+using \fBvnodes\fP [Sandburg85] [Kleiman86],
+Digital Equipment's Generic File System (GFS) architecture [Rodriguez86],
+AT&T's File System Switch (FSS) [Rifkin86],
+the LOCUS distributed file system [Walker85],
+and Masscomp's extended file system [Cole85].
+Other remote file systems have been implemented in research or
+university groups for internal use,
+notably the network file system in the Eighth Edition UNIX
+system [Weinberger84] and two different file systems used at Carnegie Mellon
+University [Satyanarayanan85].
+Numerous other remote file access methods have been devised for use
+within individual UNIX processes,
+many of them by modifications to the C I/O library
+similar to those in the Newcastle Connection [Brownbridge82].
+.PP
+Each design attempts to isolate file system-dependent details
+below a generic interface and to provide a framework within which
+new file systems may be incorporated.
+However, each of these interfaces is different from
+and incompatible with the others.
+Each addresses somewhat different design goals,
+having been based on a different version of UNIX,
+having targeted a different set of file systems with varying characteristics,
+and having selected a different set of file system primitive operations.
+.PP
+Our effort in this area is aimed at providing a common framework to
+support these different distributed file systems simultaneously rather than to
+simply implement yet another protocol.
+This requires a detailed study of the existing protocols,
+and discussion with their implementors to determine whether
+they could modify their implementation to fit within our proposed
+framework. We have studied the various file system interfaces to determine
+their generality, completeness, robustness, efficiency, and aesthetics
+and are currently working on a file system interface
+that we believe includes the best features of
+each of the existing implementations.
+This work and the rationale underlying its development
+have been presented to major software vendors as an early step
+toward convergence on a standard compatible file system interface.
+Briefly, the proposal adopts the 4.3BSD calling convention for file
+name lookup but otherwise is closely related to Sun's VFS
+and DEC's GFS. [Karels86].
+.NH 2
+System Security
+.PP
+The recent invasion of the DARPA Internet by a quickly reproducing ``worm''
+highlighted the need for a thorough review of the access
+safeguards built into the system.
+Until now, we have taken a passive approach to dealing with
+weaknesses in the system access mechanisms, rather than actively
+searching for possible weaknesses.
+When we are notified of a problem or loophole in a system utility
+by one of our users,
+we have a well defined procedure for fixing the problem and
+expeditiously disseminating the fix to the BSD mailing list.
+This procedure has proven itself to be effective in
+solving known problems as they arise
+(witness its success in handling the recent worm).
+However, we feel that it would be useful to take a more active
+role in identifying problems before they are reported (or exploited).
+We will make a complete audit of the system
+utilities and network servers to find unintended system access mechanisms.
+.PP
+As a part of the work to make the system more resistant to attack
+from local users or via the network, it will be necessary to produce
+additional documentation on the configuration and operation of the system.
+This documentation will cover such topics as file and directory ownership
+and access, network and server configuration,
+and control of privileged operations such as file system backups.
+.PP
+We are investigating the addition of access control lists (ACLs) for
+filesystem objects.
+ACLs provide a much finer granularity of control over file access permissions
+than the current
+discretionary access control mechanism (mode bits).
+Furthermore, they are necessary
+in environments where C2 level security or better, as defined in the DoD
+TCSEC [DoD83], is required.
+The POSIX P1003.6 security group has made notable progress in determining
+how an ACL mechanism should work, and several vendors have implemented
+ACLs for their commercial systems.
+Berkeley will investigate the existing implementations and determine
+how to best integrate ACLs with the existing mechanism.
+.PP
+A major shortcoming of the present system is that authentication
+over the network is based solely on the privileged port mechanism
+between trusting hosts and users.
+Although privileged ports can only be created by processes running as root
+on a UNIX system,
+such processes are easy for a workstation user to obtain;
+they simply reboot their workstation in single user mode.
+Thus, a better authentication mechanism is needed.
+At present, we believe that the MIT Kerberos authentication
+server [Steiner88] provides the best solution to this problem.
+We propose to investigate Kerberos further as well as other
+authentication mechanisms and then to integrate
+the best one into Berkeley UNIX.
+Part of this integration would be the addition of the
+authentication mechanism into utilities such as
+telnet, login, remote shell, etc.
+We will add support for telnet (eventually replacing rlogin),
+the X window system, and the mail system within an authentication
+domain (a Kerberos \fIrealm\fP).
+We hope to replace the existing password authentication on each host
+with the network authentication system.
+.NH
+References
+.sp
+.IP Brownbridge82
+Brownbridge, D.R., L.F. Marshall, B. Randell,
+``The Newcastle Connection, or UNIXes of the World Unite!,''
+\fISoftware\- Practice and Experience\fP, Vol. 12, pp. 1147-1162, 1982.
+.sp
+.IP Cole85
+.br
+Cole, C.T., P.B. Flinn, A.B. Atlas,
+``An Implementation of an Extended File System for UNIX,''
+\fIUsenix Conference Proceedings\fP,
+pp. 131-150, June, 1985.
+.sp
+.IP DoD83
+.br
+Department of Defense,
+``Trusted Computer System Evaluation Criteria,''
+\fICSC-STD-001-83\fP,
+DoD Computer Security Center, August, 1983.
+.sp
+.IP Karels86
+Karels, M., M. McKusick,
+``Towards a Compatible File System Interface,''
+\fIProceedings of the European UNIX Users Group Meeting\fP,
+Manchester, England, pp. 481-496, September 1986.
+.sp
+.IP Kleiman86
+Kleiman, S.,
+``Vnodes: An Architecture for Multiple File System Types in Sun UNIX,''
+\fIUsenix Conference Proceedings\fP,
+pp. 238-247, June, 1986.
+.sp
+.IP Leffler84
+Leffler, S., M.K. McKusick, M. Karels,
+``Measuring and Improving the Performance of 4.2BSD,''
+\fIUsenix Conference Proceedings\fP, pp. 237-252, June, 1984.
+.sp
+.IP McKusick84
+McKusick, M.K., W. Joy, S. Leffler, R. Fabry,
+``A Fast File System for UNIX'',
+\fIACM Transactions on Computer Systems 2\fP, 3.
+pp 181-197, August 1984.
+.sp
+.IP McKusick85
+McKusick, M.K., M. Karels, S. Leffler,
+``Performance Improvements and Functional Enhancements in 4.3BSD,''
+\fIUsenix Conference Proceedings\fP, pp. 519-531, June, 1985.
+.sp
+.IP McKusick86
+McKusick, M.K., M. Karels,
+``A New Virtual Memory Implementation for Berkeley UNIX,''
+\fIProceedings of the European UNIX Users Group Meeting\fP,
+Manchester, England, pp. 451-460, September 1986.
+.sp
+.IP McKusick88
+McKusick, M.K., M. Karels,
+``Design of a General Purpose Memory Allocator for the 4.3BSD UNIX Kernel,''
+\fIUsenix Conference Proceedings\fP,
+pp. 295-303, June, 1988.
+.sp
+.IP Rifkin86
+Rifkin, A.P., M.P. Forbes, R.L. Hamilton, M. Sabrio, S. Shah, K. Yueh,
+``RFS Architectural Overview,'' \fIUsenix Conference Proceedings\fP,
+pp. 248-259, June, 1986.
+.sp
+.IP Rodriguez86
+Rodriguez, R., M. Koehler, R. Hyde,
+``The Generic File System,''
+\fIUsenix Conference Proceedings\fP,
+pp. 260-269, June, 1986.
+.sp
+.IP Sandberg85
+Sandberg, R., D. Goldberg, S. Kleiman, D. Walsh, B. Lyon,
+``Design and Implementation of the Sun Network File System,''
+\fIUsenix Conference Proceedings\fP,
+pp. 119-130, June, 1985.
+.sp
+.IP Satyanarayanan85
+Satyanarayanan, M., \fIet al.\fP,
+``The ITC Distributed File System: Principles and Design,''
+\fIProc. 10th Symposium on Operating Systems Principles\fP, pp. 35-50,
+ACM, December, 1985.
+.sp
+.IP Steiner88
+Steiner, J., C. Newman, J. Schiller,
+``\fIKerberos:\fP An Authentication Service for Open Network Systems,''
+\fIUsenix Conference Proceedings\fP, pp. 191-202, February, 1988.
+.sp
+.IP Walker85
+Walker, B.J. and S.H. Kiser, ``The LOCUS Distributed File System,''
+\fIThe LOCUS Distributed System Architecture\fP,
+G.J. Popek and B.J. Walker, ed., The MIT Press, Cambridge, MA, 1985.
+.sp
+.IP Weinberger84
+Weinberger, P.J., ``The Version 8 Network File System,''
+\fIUsenix Conference presentation\fP,
+June, 1984.
diff --git a/share/doc/papers/diskperf/Makefile b/share/doc/papers/diskperf/Makefile
new file mode 100644
index 0000000..4370f81
--- /dev/null
+++ b/share/doc/papers/diskperf/Makefile
@@ -0,0 +1,11 @@
+# @(#)Makefile 6.3 (Berkeley) 6/8/93
+
+DIR= papers/diskperf
+SRCS= abs.ms motivation.ms equip.ms methodology.ms tests.ms results.ms \
+ conclusions.ms appendix.ms
+MACROS= -ms
+
+paper.ps: ${SRCS}
+ ${TBL} ${SRCS} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/diskperf/abs.ms b/share/doc/papers/diskperf/abs.ms
new file mode 100644
index 0000000..a61104d
--- /dev/null
+++ b/share/doc/papers/diskperf/abs.ms
@@ -0,0 +1,176 @@
+.\" Copyright (c) 1983 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)abs.ms 6.2 (Berkeley) 4/16/91
+.\"
+.if n .ND
+.TL
+Performance Effects of Disk Subsystem Choices
+for VAX\(dg Systems Running 4.2BSD UNIX*
+.sp
+Revised July 27, 1983
+.AU
+Bob Kridle
+.AI
+mt Xinu
+2560 9th Street
+Suite #312
+Berkeley, California 94710
+.AU
+Marshall Kirk McKusick\(dd
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, CA 94720
+.AB
+.FS
+\(dgVAX, UNIBUS, and MASSBUS are trademarks of Digital Equipment Corporation.
+.FE
+.FS
+* UNIX is a trademark of Bell Laboratories.
+.FE
+.FS
+\(ddThis work was supported under grants from
+the National Science Foundation under grant MCS80-05144,
+and the Defense Advance Research Projects Agency (DoD) under
+Arpa Order No. 4031 monitored by Naval Electronic System Command under
+Contract No. N00039-82-C-0235.
+.FE
+Measurements were made of the UNIX file system
+throughput for various I/O operations using the most attractive currently
+available Winchester disks and controllers attached to both the
+native busses (SBI/CMI) and the UNIBUS on both VAX 11/780s and VAX 11/750s.
+The tests were designed to highlight the performance of single
+and dual drive subsystems operating in the 4.2BSD
+.I
+fast file system
+.R
+environment.
+Many of the results of the tests were initially counter-intuitive
+and revealed several important aspects of the VAX implementations
+which were surprising to us.
+.PP
+The hardware used included two Fujitsu 2351A
+``Eagle''
+disk drives on each of two foreign-vendor disk controllers
+and two DEC RA-81 disk drives on a DEC UDA-50 disk controller.
+The foreign-vendor controllers were Emulex SC750, SC780
+and Systems Industries 9900 native bus interfaced controllers.
+The DEC UDA-50 controller is a UNIBUS interfaced, heavily buffered
+controller which is the first implementation of a new DEC storage
+system architecture, DSA.
+.PP
+One of the most important results of our testing was the correction
+of several timing parameters in our device handler for devices
+with an RH750/RH780 type interface and having high burst transfer
+rates.
+The correction of these parameters resulted in an increase in
+performance of over twenty percent in some cases.
+In addition, one of the controller manufacturers altered their bus
+arbitration scheme to produce another increase in throughput.
+.AE
+.LP
+.de PT
+.lt \\n(LLu
+.pc %
+.nr PN \\n%
+.tl '\\*(LH'\\*(CH'\\*(RH'
+.lt \\n(.lu
+..
+.af PN i
+.ds LH Performance
+.ds RH Contents
+.bp 1
+.\".if t .ds CF July 27, 1983
+.\".if t .ds LF CSRG TR/8
+.\".if t .ds RF Kridle, et. al.
+.ce
+.B "TABLE OF CONTENTS"
+.LP
+.sp 1
+.nf
+.B "1. Motivation"
+.LP
+.sp .5v
+.nf
+.B "2. Equipment
+2.1. DEC UDA50 disk controller
+2.2. Emulex SC750/SC780 disk controllers
+2.3. Systems Industries 9900 disk controller
+2.4. DEC RA81 disk drives
+2.5. Fujitsu 2351A disk drives
+.LP
+.sp .5v
+.nf
+.B "3. Methodology
+.LP
+.sp .5v
+.nf
+.B "4. Tests
+.LP
+.sp .5v
+.nf
+.B "5. Results
+.LP
+.sp .5v
+.nf
+.B "6. Conclusions
+.LP
+.sp .5v
+.nf
+.B Acknowledgements
+.LP
+.sp .5v
+.nf
+.B References
+.LP
+.sp .5v
+.nf
+.B "Appendix A
+A.1. read_8192
+A.2. write_4096
+A.3. write_8192
+A.4. rewrite_8192
+.ds RH Motivation
+.af PN 1
+.bp 1
+.de _d
+.if t .ta .6i 2.1i 2.6i
+.\" 2.94 went to 2.6, 3.64 to 3.30
+.if n .ta .84i 2.6i 3.30i
+..
+.de _f
+.if t .ta .5i 1.25i 2.5i
+.\" 3.5i went to 3.8i
+.if n .ta .7i 1.75i 3.8i
+..
diff --git a/share/doc/papers/diskperf/appendix.ms b/share/doc/papers/diskperf/appendix.ms
new file mode 100644
index 0000000..ccc4878
--- /dev/null
+++ b/share/doc/papers/diskperf/appendix.ms
@@ -0,0 +1,98 @@
+.\" Copyright (c) 1983 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)appendix.ms 6.2 (Berkeley) 4/16/91
+.\"
+.nr H2 1
+.ds RH Appendix A
+.SH
+\s+2Appendix A\s0
+.SH
+read_8192
+.DS
+#define BUFSIZ 8192
+main( argc, argv)
+char **argv;
+{
+ char buf[BUFSIZ];
+ int i, j;
+
+ j = open(argv[1], 0);
+ for (i = 0; i < 1024; i++)
+ read(j, buf, BUFSIZ);
+}
+.DE
+.SH
+write_4096
+.DS
+#define BUFSIZ 4096
+main( argc, argv)
+char **argv;
+{
+ char buf[BUFSIZ];
+ int i, j;
+
+ j = creat(argv[1], 0666);
+ for (i = 0; i < 2048; i++)
+ write(j, buf, BUFSIZ);
+}
+.DE
+.SH
+write_8192
+.DS
+#define BUFSIZ 8192
+main( argc, argv)
+char **argv;
+{
+ char buf[BUFSIZ];
+ int i, j;
+
+ j = creat(argv[1], 0666);
+ for (i = 0; i < 1024; i++)
+ write(j, buf, BUFSIZ);
+}
+.DE
+.bp
+.SH
+rewrite_8192
+.DS
+#define BUFSIZ 8192
+main( argc, argv)
+char **argv;
+{
+ char buf[BUFSIZ];
+ int i, j;
+
+ j = open(argv[1], 2);
+ for (i = 0; i < 1024; i++)
+ write(j, buf, BUFSIZ);
+}
+.DE
diff --git a/share/doc/papers/diskperf/conclusions.ms b/share/doc/papers/diskperf/conclusions.ms
new file mode 100644
index 0000000..5381106
--- /dev/null
+++ b/share/doc/papers/diskperf/conclusions.ms
@@ -0,0 +1,127 @@
+.\" Copyright (c) 1983 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)conclusions.ms 6.2 (Berkeley) 4/16/91
+.\"
+.ds RH Conclusions
+.NH
+Conclusions
+.PP
+Peak available throughput is only one criterion
+in most storage system purchasing decisions.
+Most of the VAX UNIX systems we are familiar with
+are not I/O bandwidth constrained.
+Nevertheless, an adequate disk bandwidth is necessary for
+good performance and especially to preserve snappy
+response time.
+All of the disk systems we tested provide more than
+adequate bandwidth for typical VAX UNIX system application.
+Perhaps in some I/O-intensive applications such as
+image processing, more consideration should be given
+to the peak throughput available.
+In most situations, we feel that other factors are more
+important in making a storage choice between the systems we
+tested.
+Cost, reliability, availability, and support are some of these
+factors.
+The maturity of the technology purchased must also be weighed
+against the future value and expandability of newer technologies.
+.PP
+Two important conclusions about storage systems in general
+can be drawn from these tests.
+The first is that buffering can be effective in smoothing
+the the effects of lower bus speeds and bus contention.
+Even though the UDA50 is located on the relatively slow
+UNIBUS, its performance is similar to controllers located on
+the faster processor busses.
+However, the SC780 with only one sector of buffering shows that
+little buffering is needed if the underlying bus is fast enough.
+.PP
+Placing more intelligence in the controller seems to hinder UNIX system
+performance more than it helps.
+Our profiling tests have indicated that UNIX spends about
+the same percentage of time in the SC780 driver and the UDA50 driver
+(about 10-14%).
+Normally UNIX uses a disk sort algorithm that separates reads and
+writes into two seek order queues.
+The read queue has priority over the write queue,
+since reads cause processes to block,
+while writes can be done asynchronously.
+This is particularly useful when generating large files,
+as it allows the disk allocator to read
+new disk maps and begin doing new allocations
+while the blocks allocated out of the previous map are written to disk.
+Because the UDA50 handles all block ordering,
+and because it keeps all requests in a single queue,
+there is no way to force the longer seek needed to get the next disk map.
+This disfunction causes all the writes to be done before the disk map read,
+which idles the disk until a new set of blocks can be allocated.
+.PP
+The additional functionality of the UDA50 controller that allows it
+to transfer simultaneously from two drives at once tends to make
+the two drive transfer tests run much more effectively.
+Tuning for the single drive case works more effectively in the two
+drive case than when controllers that cannot handle simultaneous
+transfers are used.
+.ds RH Acknowledgements
+.nr H2 1
+.sp 1
+.SH
+\s+2Acknowledgements\s0
+.PP
+We thank Paul Massigilia and Bill Grace
+of Digital Equipment Corp for helping us run our
+disk tests on their UDA50/RA81.
+We also thank Rich Notari and Paul Ritkowski
+of Emulex for making their machines available
+to us to run our tests of the SC780/Eagles.
+Dan McKinster, then of Systems Industries,
+arranged to make their equipment available for the tests.
+We appreciate the time provided by Bob Gross, Joe Wolf, and
+Sam Leffler on their machines to refine our benchmarks.
+Finally we thank our sponsors,
+the National Science Foundation under grant MCS80-05144,
+and the Defense Advance Research Projects Agency (DoD) under
+Arpa Order No. 4031 monitored by Naval Electronic System Command under
+Contract No. N00039-82-C-0235.
+.ds RH References
+.nr H2 1
+.sp 1
+.SH
+\s+2References\s0
+.LP
+.IP [McKusick83] 20
+M. McKusick, W. Joy, S. Leffler, R. Fabry,
+``A Fast File System for UNIX'',
+\fIACM Transactions on Computer Systems 2\fP, 3.
+pp 181-197, August 1984.
+.ds RH Appendix A
+.bp
diff --git a/share/doc/papers/diskperf/equip.ms b/share/doc/papers/diskperf/equip.ms
new file mode 100644
index 0000000..264ea04
--- /dev/null
+++ b/share/doc/papers/diskperf/equip.ms
@@ -0,0 +1,177 @@
+.\" Copyright (c) 1983 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)equip.ms 6.2 (Berkeley) 4/16/91
+.\"
+.ds RH Equipment
+.NH
+Equipment
+.PP
+Various combinations of the three manufacturers disk controllers,
+and two pairs of Winchester disk drives were tested on both
+VAX 11/780 and VAX 11/750 CPUs. The Emulex and Systems Industries
+disk controllers were interfaced to Fujitsu 2351A
+``Eagle''
+404 Megabyte disk drives.
+The DEC UDA50 disk controller was interfaced to two DEC RA81
+456 Megabyte Winchester disk drives.
+All three controllers were tested on the VAX 780 although
+only the Emulex and DEC controllers were benchmarked on the VAX 11/750.
+Systems Industries makes a VAX 11/750 CMI interface for
+their controller, but we did not have time to test this device.
+In addition, not all the storage systems were tested for
+two drive throughput.
+Each of the controllers and disk drives used in the benchmarks
+is described briefly below.
+.NH 2
+DEC UDA50 disk controller
+.PP
+This is a new controller design which is part of a larger, long range
+storage architecture referred to as
+``DSA''
+or \fBD\fRigital \fBS\fRtorage \fBA\fRrchetecture.
+An important aspect of DSA is migrating a large part
+of the storage management previously handled in the operating
+system to the storage system. Thus, the UDA50 is a much more
+intelligent controller than previous interfaces like the RH750 or
+RH780.
+The UDA50 handles all error correction.
+It also deals with most of the physical storage parameters.
+Typically, system software requests a logical block or
+sequence of blocks.
+The physical locations of these blocks,
+their head, track, and cylinder indices,
+are determined by the controller.
+The UDA50 also orders disk requests to maximize throughput
+where possible, minimizing total seek and rotational delays.
+Where multiple drives are attached to a single controller,
+the UDA50 can interleave
+simultaneous
+data transfers from multiple drives.
+.PP
+The UDA50 is a UNIBUS implementation of a DSA controller.
+It contains 52 sectors of internal buffering to minimize
+the effects of a slow UNIBUS such as the one on the VAX-11/780.
+This buffering also minimizes the effects of contention with
+other UNIBUS peripherals.
+.NH 2
+Emulex SC750/SC780 disk controllers
+.PP
+These two models of the same controller interface to the CMI bus
+of a VAX 11/750 and the SBI bus of a 11/VAX 780, respectively.
+To the operating system, they emulate either an RH750 or
+and RH780.
+The controllers install in the
+MASSBUS
+locations in the CPU cabinets and operate from the
+VAX power suplies.
+They provide an
+``SMD''
+or \fBS\fRtorage \fBM\fRodule \fBD\fRrive
+interface to the disk drives.
+Although a large number of disk drives use this interface, we tested
+the controller exclusively connected to Fujitsu 2351A disks.
+.PP
+The controller ws first implemented for the VAX-11/750 as the SC750
+model several years ago. Although the SC780 was introduced more
+recently, both are stable products with no bugs known to us.
+.NH 2
+System Industries 9900 disk controller
+.PP
+This controller is an evolution of the S.I. 9400 first introduced
+as a UNIBUS SMD interface.
+The 9900 has been enhanced to include an interface to the VAX 11/780 native
+bus, the SBI.
+It has also been upgraded to operate with higher data rate drives such
+as the Fujitsu 2351As we used in this test.
+The controller is contained in its own rack-mounted drawer with an integral
+power supply.
+The interface to the SMD is a four module set which mounts in a
+CPU cabinet slot normally occupied by an RH780.
+The SBI interface derives power from the VAX CPU cabinet power
+supplies.
+.NH 2
+DEC RA81 disk drives
+.PP
+The RA81 is a rack-mountable 456 Megabyte (formatted) Winchester
+disk drive manufactured by DEC.
+It includes a great deal of technology which is an integral part
+of the DEC \fBDSA\fR scheme.
+The novel technology includes a serial packet based communications
+protocol with the controller over a pair of mini-coaxial cables.
+The physical characteristics of the RA81 are shown in the
+table below:
+.DS
+.TS
+box,center;
+c s
+l l.
+DEC RA81 Disk Drive Characteristics
+_
+Peak Transfer Rate 2.2 Mbytes/sec.
+Rotational Speed 3,600 RPM
+Data Sectors/Track 51
+Logical Cylinders 1,248
+Logical Data Heads 14
+Data Capacity 456 Mbytes
+Minimum Seek Time 6 milliseconds
+Average Seek Time 28 milliseconds
+Maximum Seek Time 52 milliseconds
+.TE
+.DE
+.NH 2
+Fujitsu 2351A disk drives
+.PP
+The Fujitsu 2351A disk drive is a Winchester disk drive
+with an SMD controller interface.
+Fujitsu has developed a very good reputation for
+reliable storage products over the last several years.
+The 2351A has the following physical characteristics:
+.DS
+.TS
+box,center;
+c s
+l l.
+Fujitsu 2351A Disk Drive Characteristics
+_
+Peak Transfer Rate 1.859 Mbytes/sec.
+Rotational Speed 3,961 RPM
+Data Sectors/Track 48
+Cylinders 842
+Data Heads 20
+Data Capacity 404 Mbytes
+Minimum Seek Time 5 milliseconds
+Average Seek Time 18 milliseconds
+Maximum Seek Time 35 milliseconds
+.TE
+.DE
+.ds RH Methodology
+.bp
diff --git a/share/doc/papers/diskperf/methodology.ms b/share/doc/papers/diskperf/methodology.ms
new file mode 100644
index 0000000..703d7b6
--- /dev/null
+++ b/share/doc/papers/diskperf/methodology.ms
@@ -0,0 +1,111 @@
+.\" Copyright (c) 1983 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)methodology.ms 6.2 (Berkeley) 4/16/91
+.\"
+.ds RH Methodology
+.NH
+Methodology
+.PP
+Our goal was to evaluate the performance of the target peripherals
+in an environment as much like our 4.2BSD UNIX systems as possible.
+There are two basic approaches to creating this kind of test environment.
+These might be termed the \fIindirect\fR and the \fIdirect\fR approach.
+The approach used by DEC in producing most of the performance data
+on the UDA50/RA81 system under VMS is what we term the indirect
+approach.
+We chose to use the direct approach.
+.PP
+The indirect approach used by DEC involves two steps.
+First, the environment in which performance is to be evaluated
+is parameterized.
+In this case, the disk I/O characteristics of VMS were measured
+as to the distribution of various sizes of accesses and the proportion
+of reads and writes.
+This parameterization of
+typical
+I/O activity was termed a
+``vax mix.''
+The second stage involves simulating this mixture of I/O activities
+with the devices to be tested and noting the total volume of transactions
+processed per unit time by each system.
+.PP
+The problems encountered with this indirect approach often
+have to do with the completeness and correctness of the parameterization
+of the context environment.
+For example, the
+``vax mix''
+model constructed for DECs tests uses a random distribution of seeks
+to the blocks read or written.
+It is not likely that any real system produces a distribution
+of disk transfer locations which is truly random and does not
+exhibit strong locality characteristics.
+.PP
+The methodology chosen by us is direct
+in the sense that it uses the standard structured file system mechanism present
+in the 4.2BSD UNIX operating system to create the sequence of locations
+and sizes of reads and writes to the benchmarked equipment.
+We simply create, write, and read
+files as they would be by user's activities.
+The disk space allocation and disk cacheing mechanism built into
+UNIX is used to produce the actual device reads and writes as well
+as to determine their size and location on the disk.
+We measure and compare the rate at which these
+.I
+user files
+.R
+can be written, rewritten, or read.
+.PP
+The advantage of this approach is the implicit accuracy in
+testing in the same environment in which the peripheral
+will be used.
+Although this system does not account for the I/O produced
+by some paging and swapping, in our memory rich environment
+these activities account for a relatively small portion
+of the total disk activity.
+.PP
+A more significant disadvantage to the direct approach
+is the occasional difficulty we have in accounting for our
+measured results.
+The apparently straight-forward activity of reading or writing a logical file
+on disk can produce a complex mixture of disk traffic.
+File I/O is supported by a file management system that
+buffers disk traffic through an internal cache,
+which allows writes to ba handled asynchronously.
+Reads must be done synchronously,
+however this restriction is moderated by the use of read-ahead.
+Small changes in the performance of the disk controller
+subsystem can result in large and unexpected
+changes in the file system performance,
+as it may change the characteristics of the memory contention
+experienced by the processor.
+.ds RH Tests
+.bp
diff --git a/share/doc/papers/diskperf/motivation.ms b/share/doc/papers/diskperf/motivation.ms
new file mode 100644
index 0000000..2884fc5
--- /dev/null
+++ b/share/doc/papers/diskperf/motivation.ms
@@ -0,0 +1,93 @@
+.\" Copyright (c) 1983 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)motivation.ms 6.2 (Berkeley) 4/16/91
+.\"
+.ds RH Motivation
+.NH
+Motivation
+.PP
+These benchmarks were performed for several reasons.
+Foremost was our desire to obtain guideline to aid
+in choosing one the most expensive components of any
+VAX UNIX configuration, the disk storage system.
+The range of choices in this area has increased dramatically
+in the last year.
+DEC has become, with the introduction of the UDA50/RA81 system,
+cost competitive
+in the area of disk storage for the first time.
+Emulex's entry into the VAX 11/780 SBI controller
+field, the SC780, represented a important choice for us to examine, given
+our previous success with their VAX 11/750 SC750 controller and
+their UNIBUS controllers.
+The Fujitsu 2351A
+Winchester disk drive represents the lowest cost-per-byte disk storage
+known to us.
+In addition, Fujitsu's reputation for reliability was appealing.
+The many attractive aspects of these components justified a more
+careful examination of their performance aspects under UNIX.
+.PP
+In addition to the direct motivation of developing an effective
+choice of storage systems, we hoped to gain more insight into
+VAX UNIX file system and I/O performance in general.
+What generic characteristics of I/O subsystems are most
+important?
+How important is the location of the controller on the SBI/CMI versus
+the UNIBUS?
+Is extensive buffering in the controller essential or even important?
+How much can be gained by putting more of the storage system
+management and optimization function in the controller as
+DEC does with the UDA50?
+.PP
+We also wanted to resolve particular speculation about the value of
+storage system optimization by a controller in a UNIX
+environment.
+Is the access optimization as effective as that already provided
+by the existing 4.2BSD UNIX device handlers for traditional disks?
+VMS disk handlers do no seek optimization.
+This gives the UDA50 controller an advantage over other controllers
+under VMS which is not likely to be as important to UNIX.
+Are there penalties associated with greater intelligence in the controller?
+.PP
+A third and last reason for evaluating this equipment is comparable
+to the proverbial mountain climbers answer when asked why he climbs
+a particular mountain,
+``It was there.''
+In our case the equipment
+was there.
+We were lucky enough to assemble all the desired disks and controllers
+and get them installed on a temporarily idle VAX 11/780.
+This got us started collecting data.
+Although many of the tests were later rerun on a variety of other systems,
+this initial test bed was essential for working out the testing bugs
+and getting our feet wet.
+.ds RH Equipment
+.bp
diff --git a/share/doc/papers/diskperf/results.ms b/share/doc/papers/diskperf/results.ms
new file mode 100644
index 0000000..09f61a8
--- /dev/null
+++ b/share/doc/papers/diskperf/results.ms
@@ -0,0 +1,337 @@
+.\" Copyright (c) 1983 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)results.ms 6.2 (Berkeley) 4/16/91
+.\"
+.ds RH Results
+.NH
+Results
+.PP
+The following tables indicate the results of our
+test runs.
+Note that each table contains results for tests run
+on two varieties of 4.2BSD file systems.
+The first set of results is always for a file system
+with a basic blocking factor of eight Kilobytes and a
+fragment size of 1 Kilobyte. The second sets of measurements
+are for file systems with a four Kilobyte block size and a
+one Kilobyte fragment size.
+The values in parenthesis indicate the percentage of CPU
+time used by the test program.
+In the case of the two disk arm tests,
+the value in parenthesis indicates the sum of the percentage
+of the test programs that were run.
+Entries of ``n. m.'' indicate this value was not measured.
+.DS
+.TS
+box,center;
+c s s s s
+c s s s s
+c s s s s
+l | l s | l s
+l | l s | l s
+l | l l | l l
+l | c c | c c.
+4.2BSD File Systems Tests - \fBVAX 11/750\fR
+=
+Logically Sequential Transfers
+from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test Emulex SC750/Eagle UDA50/RA81
+
+ 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 490 (69%) 620 (96%) 310 (44%) 520 (65%)
+write_4096 380 (99%) 370 (99%) 370 (97%) 360 (98%)
+write_8192 470 (99%) 470 (99%) 320 (71%) 410 (83%)
+rewrite_8192 650 (99%) 620 (99%) 310 (50%) 450 (70%)
+=
+.T&
+c s s s s
+c s s s s
+l | l s | l s
+l | l s | l s
+l | l l | l l
+l | c c | c c.
+Logically Sequential Transfers
+from \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test Emulex SC750/Eagle UDA50/RA81
+
+ 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 300 (60%) 400 (84%) 210 (42%) 340 (77%)
+write_4096 320 (98%) 320 (98%) 220 (67%) 290 (99%)
+write_8192 340 (98%) 340 (99%) 220 (65%) 310 (98%)
+rewrite_8192 450 (99%) 450 (98%) 230 (47%) 340 (78%)
+.TE
+.DE
+.PP
+Note that the rate of write operations on the VAX 11/750 are ultimately
+CPU limited in some cases.
+The write rates saturate the CPU at a lower bandwidth than the reads
+because they must do disk allocation in addition to moving the data
+from the user program to the disk.
+The UDA50/RA81 saturates the CPU at a lower transfer rate for a given
+operation than the SC750/Eagle because
+it causes more memory contention with the CPU.
+We do not know if this contention is caused by
+the UNIBUS controller or the UDA50.
+.PP
+The following table reports the results of test runs on a VAX 11/780
+with 4 Megabytes of main memory.
+.DS
+.TS
+box,center;
+c s s s s s s
+c s s s s s s
+c s s s s s s
+l | l s | l s | l s
+l | l s | l s | l s
+l | l l | l l | l l
+l | c c | c c | c c.
+4.2BSD File Systems Tests - \fBVAX 11/780\fR
+=
+Logically Sequential Transfers
+from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test Emulex SC780/Eagle UDA50/RA81 Sys. Ind. 9900/Eagle
+
+ 1 Drive 2 Drives 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 560 (70%) 480 (58%) 360 (45%) 540 (72%) 340 (41%) 520 (66%)
+write_4096 440 (98%) 440 (98%) 380 (99%) 480 (96%) 490 (96%) 440 (84%)
+write_8192 490 (98%) 490 (98%) 220 (58%)* 480 (92%) 490 (80%) 430 (72%)
+rewrite_8192 760 (100%) 560 (72%) 220 (50%)* 180 (52%)* 490 (60%) 520 (62%)
+=
+.T&
+c s s s s s s
+c s s s s s s
+l | l s | l s | l s
+l | l s | l s | l s
+l | l l | l l | l l
+l | c c | c c | c c.
+Logically Sequential Transfers
+from an \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test Emulex SC780/Eagle UDA50/RA81 Sys. Ind. 9900/Eagle
+
+ 1 Drive 2 Drives 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 490 (77%) 370 (66%) n.m. n.m. 200 (31%) 370 (56%)
+write_4096 380 (98%) 370 (98%) n.m. n.m. 200 (46%) 370 (88%)
+write_8192 380 (99%) 370 (97%) n.m. n.m. 200 (45%) 320 (76%)
+rewrite_8192 490 (87%) 350 (66%) n.m. n.m. 200 (31%) 300 (46%)
+.TE
+* the operation of the hardware was suspect during these tests.
+.DE
+.PP
+The dropoff in reading and writing rates for the two drive SC780/Eagle
+tests are probably due to the file system using insufficient
+rotational delay for these tests.
+We have not fully investigated these times.
+.PP
+The following table compares data rates on VAX 11/750s directly
+with those of VAX 11/780s using the UDA50/RA81 storage system.
+.DS
+.TS
+box,center;
+c s s s s
+c s s s s
+c s s s s
+l | l s | l s
+l | l s | l s
+l | l l | l l
+l | c c | c c.
+4.2BSD File Systems Tests - \fBDEC UDA50 - 750 vs. 780\fR
+=
+Logically Sequential Transfers
+from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test VAX 11/750 UNIBUS VAX 11/780 UNIBUS
+
+ 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 310 (44%) 520 (84%) 360 (45%) 540 (72%)
+write_4096 370 (97%) 360 (100%) 380 (99%) 480 (96%)
+write_8192 320 (71%) 410 (96%) 220 (58%)* 480 (92%)
+rewrite_8192 310 (50%) 450 (80%) 220 (50%)* 180 (52%)*
+=
+.T&
+c s s s s
+c s s s s
+l | l s | l s
+l | l s | l s
+l | l l | l l
+l | c c | c c.
+Logically Sequential Transfers
+from an \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test VAX 11/750 UNIBUS VAX 11/780 UNIBUS
+
+ 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 210 (42%) 342 (77%) n.m. n.m.
+write_4096 215 (67%) 294 (99%) n.m. n.m.
+write_8192 215 (65%) 305 (98%) n.m. n.m.
+rewrite_8192 227 (47%) 336 (78%) n.m. n.m.
+.TE
+* the operation of the hardware was suspect during these tests.
+.DE
+.PP
+The higher throughput available on VAX 11/780s is due to a number
+of factors.
+The larger main memory size allows a larger file system cache.
+The block allocation routines run faster, raising the upper limit
+on the data rates in writing new files.
+.PP
+The next table makes the same comparison using an Emulex controller
+on both systems.
+.DS
+.TS
+box, center;
+c s s s s
+c s s s s
+c s s s s
+l | l s | l s
+l | l s | l s
+l | l l | l l
+l | c c | c c.
+4.2BSD File Systems Tests - \fBEmulex - 750 vs. 780\fR
+=
+Logically Sequential Transfers
+from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test VAX 11/750 CMI Bus VAX 11/780 SBI Bus
+
+ 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 490 (69%) 620 (96%) 560 (70%) 480 (58%)
+write_4096 380 (99%) 370 (99%) 440 (98%) 440 (98%)
+write_8192 470 (99%) 470 (99%) 490 (98%) 490 (98%)
+rewrite_8192 650 (99%) 620 (99%) 760 (100%) 560 (72%)
+=
+.T&
+c s s s s
+c s s s s
+l | l s | l s
+l | l s | l s
+l | l l | l l
+l | c c | c c.
+Logically Sequential Transfers
+from an \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test VAX 11/750 CMI Bus VAX 11/780 SBI Bus
+
+ 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 300 (60%) 400 (84%) 490 (77%) 370 (66%)
+write_4096 320 (98%) 320 (98%) 380 (98%) 370 (98%)
+write_8192 340 (98%) 340 (99%) 380 (99%) 370 (97%)
+rewrite_8192 450 (99%) 450 (98%) 490 (87%) 350 (66%)
+.TE
+.DE
+.PP
+The following table illustrates the evolution of our testing
+process as both hardware and software problems effecting
+the performance of the Emulex SC780 were corrected.
+The software change was suggested to us by George Goble
+of Purdue University.
+.PP
+The 4.2BSD handler for RH750/RH780 interfaced disk drives
+contains several constants which to determine how
+much time is provided between an interrupt signaling the completion
+of a positioning command and the subsequent start of a data transfer
+operation. These lead times are expressed as sectors of rotational delay.
+If they are too small, an extra complete rotation will often be required
+between a seek and subsequent read or write operation.
+The higher bit rate and rotational speed of the 2351A Fujitsu
+disk drives required
+increasing these constants.
+.PP
+The hardware change involved allowing for slightly longer
+delays in arbitrating for cycles on the SBI bus by
+starting the bus arbitration cycle a little further ahead of
+when the data was ready for transfer.
+Finally we had to increase the rotational delay between consecutive
+blocks in the file because
+the higher bandwidth from the disk generated more memory contention,
+which slowed down the processor.
+.DS
+.TS
+box,center,expand;
+c s s s s s s
+c s s s s s s
+c s s s s s s
+l | l s | l s | l s
+l | l s | l s | l s
+l | l s | l s | l s
+l | c c | c c | c c
+l | c c | c c | c c.
+4.2BSD File Systems Tests - \fBEmulex SC780 Disk Controller Evolution\fR
+=
+Logically Sequential Transfers
+from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test Inadequate Search Lead OK Search Lead OK Search Lead
+ Initial SBI Arbitration Init SBI Arb. Improved SBI Arb.
+
+ 1 Drive 2 Drives 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 320 370 440 (60%) n.m. 560 (70%) 480 (58%)
+write_4096 250 270 300 (63%) n.m. 440 (98%) 440 (98%)
+write_8192 250 280 340 (60%) n.m. 490 (98%) 490 (98%)
+rewrite_8192 250 290 380 (48%) n.m. 760 (100%) 560 (72%)
+=
+.T&
+c s s s s s s
+c s s s s s s
+l | l s | l s | l s
+l | l s | l s | l s
+l | l s | l s | l s
+l | c c | c c | c c
+l | c c | c c | c c.
+Logically Sequential Transfers
+from an \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
+_
+Test Inadequate Search Lead OK Search Lead OK Search Lead
+ Initial SBI Arbitration Init SBI Arb. Improved SBI Arb.
+
+ 1 Drive 2 Drives 1 Drive 2 Drives 1 Drive 2 Drives
+_
+read_8192 200 220 280 n.m. 490 (77%) 370 (66%)
+write_4096 180 190 300 n.m. 380 (98%) 370 (98%)
+write_8192 180 200 320 n.m. 380 (99%) 370 (97%)
+rewrite_8192 190 200 340 n.m. 490 (87%) 350 (66%)
+.TE
+.DE
+.ds RH Conclusions
+.bp
diff --git a/share/doc/papers/diskperf/tests.ms b/share/doc/papers/diskperf/tests.ms
new file mode 100644
index 0000000..1809afe
--- /dev/null
+++ b/share/doc/papers/diskperf/tests.ms
@@ -0,0 +1,108 @@
+.\" Copyright (c) 1983 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)tests.ms 6.2 (Berkeley) 4/16/91
+.\"
+.ds RH Tests
+.NH
+Tests
+.PP
+Our battery of tests consists of four programs,
+read_8192, write_8192, write_4096
+and rewrite_8192 originally written by [McKusick83]
+to evaluate the performance of the new file system in 4.2BSD.
+These programs all follow the the same model and are typified by
+read_8192 shown here.
+.DS
+#define BUFSIZ 8192
+main( argc, argv)
+char **argv;
+{
+ char buf[BUFSIZ];
+ int i, j;
+
+ j = open(argv[1], 0);
+ for (i = 0; i < 1024; i++)
+ read(j, buf, BUFSIZ);
+}
+.DE
+The remaining programs are included in appendix A.
+.PP
+These programs read, write with two different blocking factors,
+and rewrite logical files in structured file system on the disk
+under test.
+The write programs create new files while the rewrite program
+overwrites an existing file.
+Each of these programs represents an important segment of the
+typical UNIX file system activity with the read program
+representing by far the largest class and the rewrite the smallest.
+.PP
+A blocking factor of 8192 is used by all programs except write_4096.
+This is typical of most 4.2BSD user programs since a standard set of
+I/O support routines is commonly used and these routines buffer
+data in similar block sizes.
+.PP
+For each test run, a empty eight Kilobyte block
+file system was created in the target
+storage system.
+Then each of the four tests was run and timed.
+Each test was run three times;
+the first to clear out any useful data in the cache,
+and the second two to insure that the experiment
+had stablized and was repeatable.
+Each test operated on eight Megabytes of data to
+insure that the cache did not overly influence the results.
+Another file system was then initialized using a
+basic blocking factor of four Kilobytes and the same tests
+were run again and timed.
+A command script for a run appears as follows:
+.DS
+#!/bin/csh
+set time=2
+echo "8K/1K file system"
+newfs /dev/rhp0g eagle
+mount /dev/hp0g /mnt0
+mkdir /mnt0/foo
+echo "write_8192 /mnt0/foo/tst2"
+rm -f /mnt0/foo/tst2
+write_8192 /mnt0/foo/tst2
+rm -f /mnt0/foo/tst2
+write_8192 /mnt0/foo/tst2
+rm -f /mnt0/foo/tst2
+write_8192 /mnt0/foo/tst2
+echo "read_8192 /mnt0/foo/tst2"
+read_8192 /mnt0/foo/tst2
+read_8192 /mnt0/foo/tst2
+read_8192 /mnt0/foo/tst2
+umount /dev/hp0g
+.DE
+.ds RH Results
+.bp
diff --git a/share/doc/papers/fsinterface/Makefile b/share/doc/papers/fsinterface/Makefile
new file mode 100644
index 0000000..cb1d913
--- /dev/null
+++ b/share/doc/papers/fsinterface/Makefile
@@ -0,0 +1,7 @@
+# @(#)Makefile 5.3 (Berkeley) 6/8/93
+
+DIR= papers/fsinterface
+SRCS= fsinterface.ms
+MACROS= -ms
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/fsinterface/abstract.ms b/share/doc/papers/fsinterface/abstract.ms
new file mode 100644
index 0000000..ab8b473
--- /dev/null
+++ b/share/doc/papers/fsinterface/abstract.ms
@@ -0,0 +1,73 @@
+.\" Copyright (c) 1986 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)abstract.ms 5.2 (Berkeley) 4/16/91
+.\"
+.TL
+Toward a Compatible Filesystem Interface
+.AU
+Michael J. Karels
+Marshall Kirk McKusick
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.LP
+As network or remote filesystems have been implemented for
+.UX ,
+several stylized interfaces between the filesystem implementation
+and the rest of the kernel have been developed.
+Notable among these are Sun Microsystems' virtual filesystem interface
+using vnodes, Digital Equipment's Generic File System architecture,
+and AT&T's File System Switch.
+Each design attempts to isolate filesystem-dependent details
+below the generic interface and to provide a framework within which
+new filesystems may be incorporated.
+However, each of these interfaces is different from
+and incompatible with the others.
+Each of them addresses somewhat different design goals.
+Each was based upon a different starting version of
+.UX ,
+targetted a different set of filesystems with varying characteristics,
+and uses a different set of primitive operations provided by the filesystem.
+The current study compares the various filesystem interfaces.
+Criteria for comparison include generality, completeness, robustness,
+efficiency and esthetics.
+As a result of this comparison, a proposal for a new filesystem interface
+is advanced that includes the best features of the existing implementations.
+The proposal adopts the calling convention for name lookup introduced
+in 4.3BSD.
+A prototype implementation is described.
+This proposal and the rationale underlying its development
+have been presented to major software vendors
+as an early step toward convergence upon a compatible filesystem interface.
diff --git a/share/doc/papers/fsinterface/fsinterface.ms b/share/doc/papers/fsinterface/fsinterface.ms
new file mode 100644
index 0000000..c5722e6
--- /dev/null
+++ b/share/doc/papers/fsinterface/fsinterface.ms
@@ -0,0 +1,1176 @@
+.\" Copyright (c) 1986 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)fsinterface.ms 1.4 (Berkeley) 4/16/91
+.\"
+.if \nv .rm CM
+.de UX
+.ie \\n(UX \s-1UNIX\s0\\$1
+.el \{\
+\s-1UNIX\s0\\$1\(dg
+.FS
+\(dg \s-1UNIX\s0 is a registered trademark of AT&T.
+.FE
+.nr UX 1
+.\}
+..
+.TL
+Toward a Compatible Filesystem Interface
+.AU
+Michael J. Karels
+Marshall Kirk McKusick
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.AB
+.LP
+As network or remote filesystems have been implemented for
+.UX ,
+several stylized interfaces between the filesystem implementation
+and the rest of the kernel have been developed.
+.FS
+This is an update of a paper originally presented
+at the September 1986 conference of the European
+.UX
+Users' Group.
+Last modified April 16, 1991.
+.FE
+Notable among these are Sun Microsystems' Virtual Filesystem interface (VFS)
+using vnodes, Digital Equipment's Generic File System (GFS) architecture,
+and AT&T's File System Switch (FSS).
+Each design attempts to isolate filesystem-dependent details
+below a generic interface and to provide a framework within which
+new filesystems may be incorporated.
+However, each of these interfaces is different from
+and incompatible with the others.
+Each of them addresses somewhat different design goals.
+Each was based on a different starting version of
+.UX ,
+targetted a different set of filesystems with varying characteristics,
+and uses a different set of primitive operations provided by the filesystem.
+The current study compares the various filesystem interfaces.
+Criteria for comparison include generality, completeness, robustness,
+efficiency and esthetics.
+Several of the underlying design issues are examined in detail.
+As a result of this comparison, a proposal for a new filesystem interface
+is advanced that includes the best features of the existing implementations.
+The proposal adopts the calling convention for name lookup introduced
+in 4.3BSD, but is otherwise closely related to Sun's VFS.
+A prototype implementation is now being developed at Berkeley.
+This proposal and the rationale underlying its development
+have been presented to major software vendors
+as an early step toward convergence on a compatible filesystem interface.
+.AE
+.SH
+Introduction
+.PP
+As network communications and workstation environments
+became common elements in
+.UX
+systems, several vendors of
+.UX
+systems have designed and built network file systems
+that allow client process on one
+.UX
+machine to access files on a server machine.
+Examples include Sun's Network File System, NFS [Sandberg85],
+AT&T's recently-announced Remote File Sharing, RFS [Rifkin86],
+the LOCUS distributed filesystem [Walker85],
+and Masscomp's extended filesystem [Cole85].
+Other remote filesystems have been implemented in research or university groups
+for internal use, notably the network filesystem in the Eighth Edition
+.UX
+system [Weinberger84] and two different filesystems used at Carnegie-Mellon
+University [Satyanarayanan85].
+Numerous other remote file access methods have been devised for use
+within individual
+.UX
+processes,
+many of them by modifications to the C I/O library
+similar to those in the Newcastle Connection [Brownbridge82].
+.PP
+Multiple network filesystems may frequently
+be found in use within a single organization.
+These circumstances make it highly desirable to be able to transport filesystem
+implementations from one system to another.
+Such portability is considerably enhanced by the use of a stylized interface
+with carefully-defined entry points to separate the filesystem from the rest
+of the operating system.
+This interface should be similar to the interface between device drivers
+and the kernel.
+Although varying somewhat among the common versions of
+.UX ,
+the device driver interfaces are sufficiently similar that device drivers
+may be moved from one system to another without major problems.
+A clean, well-defined interface to the filesystem also allows a single
+system to support multiple local filesystem types.
+.PP
+For reasons such as these, several filesystem interfaces have been used
+when integrating new filesystems into the system.
+The best-known of these are Sun Microsystems' Virtual File System interface,
+VFS [Kleiman86], and AT&T's File System Switch, FSS.
+Another interface, known as the Generic File System, GFS,
+has been implemented for the ULTRIX\(dd
+.FS
+\(dd ULTRIX is a trademark of Digital Equipment Corp.
+.FE
+system by Digital [Rodriguez86].
+There are numerous differences among these designs.
+The differences may be understood from the varying philosophies
+and design goals of the groups involved, from the systems under which
+the implementations were done, and from the filesystems originally targetted
+by the designs.
+These differences are summarized in the following sections
+within the limitations of the published specifications.
+.SH
+Design goals
+.PP
+There are several design goals which, in varying degrees,
+have driven the various designs.
+Each attempts to divide the filesystem into a filesystem-type-independent
+layer and individual filesystem implementations.
+The division between these layers occurs at somewhat different places
+in these systems, reflecting different views of the diversity and types
+of the filesystems that may be accommodated.
+Compatibility with existing local filesystems has varying importance;
+at the user-process level, each attempts to be completely transparent
+except for a few filesystem-related system management programs.
+The AT&T interface also makes a major effort to retain familiar internal
+system interfaces, and even to retain object-file-level binary compatibility
+with operating system modules such as device drivers.
+Both Sun and DEC were willing to change internal data structures and interfaces
+so that other operating system modules might require recompilation
+or source-code modification.
+.PP
+AT&T's interface both allows and requires filesystems to support the full
+and exact semantics of their previous filesystem,
+including interruptions of system calls on slow operations.
+System calls that deal with remote files are encapsulated
+with their environment and sent to a server where execution continues.
+The system call may be aborted by either client or server, returning
+control to the client.
+Most system calls that descend into the file-system dependent layer
+of a filesystem other than the standard local filesystem do not return
+to the higher-level kernel calling routines.
+Instead, the filesystem-dependent code completes the requested
+operation and then executes a non-local goto (\fIlongjmp\fP) to exit the
+system call.
+These efforts to avoid modification of main-line kernel code
+indicate a far greater emphasis on internal compatibility than on modularity,
+clean design, or efficiency.
+.PP
+In contrast, the Sun VFS interface makes major modifications to the internal
+interfaces in the kernel, with a very clear separation
+of filesystem-independent and -dependent data structures and operations.
+The semantics of the filesystem are largely retained for local operations,
+although this is achieved at some expense where it does not fit the internal
+structuring well.
+The filesystem implementations are not required to support the same
+semantics as local
+.UX
+filesystems.
+Several historical features of
+.UX
+filesystem behavior are difficult to achieve using the VFS interface,
+including the atomicity of file and link creation and the use of open files
+whose names have been removed.
+.PP
+A major design objective of Sun's network filesystem,
+statelessness,
+permeates the VFS interface.
+No locking may be done in the filesystem-independent layer,
+and locking in the filesystem-dependent layer may occur only during
+a single call into that layer.
+.PP
+A final design goal of most implementors is performance.
+For remote filesystems,
+this goal tends to be in conflict with the goals of complete semantic
+consistency, compatibility and modularity.
+Sun has chosen performance over modularity in some areas,
+but has emphasized clean separation of the layers within the filesystem
+at the expense of performance.
+Although the performance of RFS is yet to be seen,
+AT&T seems to have considered compatibility far more important than modularity
+or performance.
+.SH
+Differences among filesystem interfaces
+.PP
+The existing filesystem interfaces may be characterized
+in several ways.
+Each system is centered around a few data structures or objects,
+along with a set of primitives for performing operations upon these objects.
+In the original
+.UX
+filesystem [Ritchie74],
+the basic object used by the filesystem is the inode, or index node.
+The inode contains all of the information about a file except its name:
+its type, identification, ownership, permissions, timestamps and location.
+Inodes are identified by the filesystem device number and the index within
+the filesystem.
+The major entry points to the filesystem are \fInamei\fP,
+which translates a filesystem pathname into the underlying inode,
+and \fIiget\fP, which locates an inode by number and installs it in the in-core
+inode table.
+\fINamei\fP performs name translation by iterative lookup
+of each component name in its directory to find its inumber,
+then using \fIiget\fP to return the actual inode.
+If the last component has been reached, this inode is returned;
+otherwise, the inode describes the next directory to be searched.
+The inode returned may be used in various ways by the caller;
+it may be examined, the file may be read or written,
+types and access may be checked, and fields may be modified.
+Modified inodes are automatically written back the the filesystem
+on disk when the last reference is released with \fIiput\fP.
+Although the details are considerably different,
+the same general scheme is used in the faster filesystem in 4.2BSD
+.UX
+[Mckusick85].
+.PP
+Both the AT&T interface and, to a lesser extent, the DEC interface
+attempt to preserve the inode-oriented interface.
+Each modify the inode to allow different varieties of the structure
+for different filesystem types by separating the filesystem-dependent
+parts of the inode into a separate structure or one arm of a union.
+Both interfaces allow operations
+equivalent to the \fInamei\fP and \fIiget\fP operations
+of the old filesystem to be performed in the filesystem-independent
+layer, with entry points to the individual filesystem implementations to support
+the type-specific parts of these operations. Implicit in this interface
+is that files may be conveniently be named by and located using a single
+index within a filesystem.
+The GFS provides specific entry points to the filesystems
+to change most file properties rather than allowing arbitrary changes
+to be made to the generic part of the inode.
+.PP
+In contrast, the Sun VFS interface replaces the inode as the primary object
+with the vnode.
+The vnode contains no filesystem-dependent fields except the pointer
+to the set of operations implemented by the filesystem.
+Properties of a vnode that might be transient, such as the ownership,
+permissions, size and timestamps, are maintained by the lower layer.
+These properties may be presented in a generic format upon request;
+callers are expected not to hold this information for any length of time,
+as they may not be up-to-date later on.
+The vnode operations do not include a corollary for \fIiget\fP;
+the only external interface for obtaining vnodes for specific files
+is the name lookup operation.
+(Separate procedures are provided outside of this interface
+that obtain a ``file handle'' for a vnode which may be given
+to a client by a server, such that the vnode may be retrieved
+upon later presentation of the file handle.)
+.SH
+Name translation issues
+.PP
+Each of the systems described include a mechanism for performing
+pathname-to-internal-representation translation.
+The style of the name translation function is very different in all
+three systems.
+As described above, the AT&T and DEC systems retain the \fInamei\fP function.
+The two are quite different, however, as the ULTRIX interface uses
+the \fInamei\fP calling convention introduced in 4.3BSD.
+The parameters and context for the name lookup operation
+are collected in a \fInameidata\fP structure which is passed to \fInamei\fP
+for operation.
+Intent to create or delete the named file is declared in advance,
+so that the final directory scan in \fInamei\fP may retain information
+such as the offset in the directory at which the modification will be made.
+Filesystems that use such mechanisms to avoid redundant work
+must therefore lock the directory to be modified so that it may not
+be modified by another process before completion.
+In the System V filesystem, as in previous versions of
+.UX ,
+this information is stored in the per-process \fIuser\fP structure
+by \fInamei\fP for use by a low-level routine called after performing
+the actual creation or deletion of the file itself.
+In 4.3BSD and in the GFS interface, these side effects of \fInamei\fP
+are stored in the \fInameidata\fP structure given as argument to \fInamei\fP,
+which is also presented to the routine implementing file creation or deletion.
+.PP
+The ULTRIX \fInamei\fP routine is responsible for the generic
+parts of the name translation process, such as copying the name into
+an internal buffer, validating it, interpolating
+the contents of symbolic links, and indirecting at mount points.
+As in 4.3BSD, the name is copied into the buffer in a single call,
+according to the location of the name.
+After determining the type of the filesystem at the start of translation
+(the current directory or root directory), it calls the filesystem's
+\fInamei\fP entry with the same structure it received from its caller.
+The filesystem-specific routine translates the name, component by component,
+as long as no mount points are reached.
+It may return after any number of components have been processed.
+\fINamei\fP performs any processing at mount points, then calls
+the correct translation routine for the next filesystem.
+Network filesystems may pass the remaining pathname to a server for translation,
+or they may look up the pathname components one at a time.
+The former strategy would be more efficient,
+but the latter scheme allows mount points within a remote filesystem
+without server knowledge of all client mounts.
+.PP
+The AT&T \fInamei\fP interface is presumably the same as that in previous
+.UX
+systems, accepting the name of a routine to fetch pathname characters
+and an operation (one of: lookup, lookup for creation, or lookup for deletion).
+It translates, component by component, as before.
+If it detects that a mount point crosses to a remote filesystem,
+it passes the remainder of the pathname to the remote server.
+A pathname-oriented request other than open may be completed
+within the \fInamei\fP call,
+avoiding return to the (unmodified) system call handler
+that called \fInamei\fP.
+.PP
+In contrast to the first two systems, Sun's VFS interface has replaced
+\fInamei\fP with \fIlookupname\fP.
+This routine simply calls a new pathname-handling module to allocate
+a pathname buffer and copy in the pathname (copying a character per call),
+then calls \fIlookuppn\fP.
+\fILookuppn\fP performs the iteration over the directories leading
+to the destination file; it copies each pathname component to a local buffer,
+then calls the filesystem \fIlookup\fP entry to locate the vnode
+for that file in the current directory.
+Per-filesystem \fIlookup\fP routines may translate only one component
+per call.
+For creation and deletion of new files, the lookup operation is unmodified;
+the lookup of the final component only serves to check for the existence
+of the file.
+The subsequent creation or deletion call, if any, must repeat the final
+name translation and associated directory scan.
+For new file creation in particular, this is rather inefficient,
+as file creation requires two complete scans of the directory.
+.PP
+Several of the important performance improvements in 4.3BSD
+were related to the name translation process [McKusick85][Leffler84].
+The following changes were made:
+.IP 1. 4
+A system-wide cache of recent translations is maintained.
+The cache is separate from the inode cache, so that multiple names
+for a file may be present in the cache.
+The cache does not hold ``hard'' references to the inodes,
+so that the normal reference pattern is not disturbed.
+.IP 2.
+A per-process cache is kept of the directory and offset
+at which the last successful name lookup was done.
+This allows sequential lookups of all the entries in a directory to be done
+in linear time.
+.IP 3.
+The entire pathname is copied into a kernel buffer in a single operation,
+rather than using two subroutine calls per character.
+.IP 4.
+A pool of pathname buffers are held by \fInamei\fP, avoiding allocation
+overhead.
+.LP
+All of these performance improvements from 4.3BSD are well worth using
+within a more generalized filesystem framework.
+The generalization of the structure may otherwise make an already-expensive
+function even more costly.
+Most of these improvements are present in the GFS system, as it derives
+from the beta-test version of 4.3BSD.
+The Sun system uses a name-translation cache generally like that in 4.3BSD.
+The name cache is a filesystem-independent facility provided for the use
+of the filesystem-specific lookup routines.
+The Sun cache, like that first used at Berkeley but unlike that in 4.3,
+holds a ``hard'' reference to the vnode (increments the reference count).
+The ``soft'' reference scheme in 4.3BSD cannot be used with the current
+NFS implementation, as NFS allocates vnodes dynamically and frees them
+when the reference count returns to zero rather than caching them.
+As a result, fewer names may be held in the cache
+than (local filesystem) vnodes, and the cache distorts the normal reference
+patterns otherwise seen by the LRU cache.
+As the name cache references overflow the local filesystem inode table,
+the name cache must be purged to make room in the inode table.
+Also, to determine whether a vnode is in use (for example,
+before mounting upon it), the cache must be flushed to free any
+cache reference.
+These problems should be corrected
+by the use of the soft cache reference scheme.
+.PP
+A final observation on the efficiency of name translation in the current
+Sun VFS architecture is that the number of subroutine calls used
+by a multi-component name lookup is dramatically larger
+than in the other systems.
+The name lookup scheme in GFS suffers from this problem much less,
+at no expense in violation of layering.
+.PP
+A final problem to be considered is synchronization and consistency.
+As the filesystem operations are more stylized and broken into separate
+entry points for parts of operations, it is more difficult to guarantee
+consistency throughout an operation and/or to synchronize with other
+processes using the same filesystem objects.
+The Sun interface suffers most severely from this,
+as it forbids the filesystems from locking objects across calls
+to the filesystem.
+It is possible that a file may be created between the time that a lookup
+is performed and a subsequent creation is requested.
+Perhaps more strangely, after a lookup fails to find the target
+of a creation attempt, the actual creation might find that the target
+now exists and is a symbolic link.
+The call will either fail unexpectedly, as the target is of the wrong type,
+or the generic creation routine will have to note the error
+and restart the operation from the lookup.
+This problem will always exist in a stateless filesystem,
+but the VFS interface forces all filesystems to share the problem.
+This restriction against locking between calls also
+forces duplication of work during file creation and deletion.
+This is considered unacceptable.
+.SH
+Support facilities and other interactions
+.PP
+Several support facilities are used by the current
+.UX
+filesystem and require generalization for use by other filesystem types.
+For filesystem implementations to be portable,
+it is desirable that these modified support facilities
+should also have a uniform interface and
+behave in a consistent manner in target systems.
+A prominent example is the filesystem buffer cache.
+The buffer cache in a standard (System V or 4.3BSD)
+.UX
+system contains physical disk blocks with no reference to the files containing
+them.
+This works well for the local filesystem, but has obvious problems
+for remote filesystems.
+Sun has modified the buffer cache routines to describe buffers by vnode
+rather than by device.
+For remote files, the vnode used is that of the file, and the block
+numbers are virtual data blocks.
+For local filesystems, a vnode for the block device is used for cache reference,
+and the block numbers are filesystem physical blocks.
+Use of per-file cache description does not easily accommodate
+caching of indirect blocks, inode blocks, superblocks or cylinder group blocks.
+However, the vnode describing the block device for the cache
+is one created internally,
+rather than the vnode for the device looked up when mounting,
+and it is located by searching a private list of vnodes
+rather than by holding it in the mount structure.
+Although the Sun modification makes it possible to use the buffer
+cache for data blocks of remote files, a better generalization
+of the buffer cache is needed.
+.PP
+The RFS filesystem used by AT&T does not currently cache data blocks
+on client systems, thus the buffer cache is probably unmodified.
+The form of the buffer cache in ULTRIX is unknown to us.
+.PP
+Another subsystem that has a large interaction with the filesystem
+is the virtual memory system.
+The virtual memory system must read data from the filesystem
+to satisfy fill-on-demand page faults.
+For efficiency, this read call is arranged to place the data directly
+into the physical pages assigned to the process (a ``raw'' read) to avoid
+copying the data.
+Although the read operation normally bypasses the filesystem buffer cache,
+consistency must be maintained by checking the buffer cache and copying
+or flushing modified data not yet stored on disk.
+The 4.2BSD virtual memory system, like that of Sun and ULTRIX,
+maintains its own cache of reusable text pages.
+This creates additional complications.
+As the virtual memory systems are redesigned, these problems should be
+resolved by reading through the buffer cache, then mapping the cached
+data into the user address space.
+If the buffer cache or the process pages are changed while the other reference
+remains, the data would have to be copied (``copy-on-write'').
+.PP
+In the meantime, the current virtual memory systems must be used
+with the new filesystem framework.
+Both the Sun and AT&T filesystem interfaces
+provide entry points to the filesystem for optimization of the virtual
+memory system by performing logical-to-physical block number translation
+when setting up a fill-on-demand image for a process.
+The VFS provides a vnode operation analogous to the \fIbmap\fP function of the
+.UX
+filesystem.
+Given a vnode and logical block number, it returns a vnode and block number
+which may be read to obtain the data.
+If the filesystem is local, it returns the private vnode for the block device
+and the physical block number.
+As the \fIbmap\fP operations are all performed at one time, during process
+startup, any indirect blocks for the file will remain in the cache
+after they are once read.
+In addition, the interface provides a \fIstrategy\fP entry that may be used
+for ``raw'' reads from a filesystem device,
+used to read data blocks into an address space without copying.
+This entry uses a buffer header (\fIbuf\fP structure)
+to describe the I/O operation
+instead of a \fIuio\fP structure.
+The buffer-style interface is the same as that used by disk drivers internally.
+This difference allows the current \fIuio\fP primitives to be avoided,
+as they copy all data to/from the current user process address space.
+Instead, for local filesystems these operations could be done internally
+with the standard raw disk read routines,
+which use a \fIuio\fP interface.
+When loading from a remote filesystems,
+the data will be received in a network buffer.
+If network buffers are suitably aligned,
+the data may be mapped into the process address space by a page swap
+without copying.
+In either case, it should be possible to use the standard filesystem
+read entry from the virtual memory system.
+.PP
+Other issues that must be considered in devising a portable
+filesystem implementation include kernel memory allocation,
+the implicit use of user-structure global context,
+which may create problems with reentrancy,
+the style of the system call interface,
+and the conventions for synchronization
+(sleep/wakeup, handling of interrupted system calls, semaphores).
+.SH
+The Berkeley Proposal
+.PP
+The Sun VFS interface has been most widely used of the three described here.
+It is also the most general of the three, in that filesystem-specific
+data and operations are best separated from the generic layer.
+Although it has several disadvantages which were described above,
+most of them may be corrected with minor changes to the interface
+(and, in a few areas, philosophical changes).
+The DEC GFS has other advantages, in particular the use of the 4.3BSD
+\fInamei\fP interface and optimizations.
+It allows single or multiple components of a pathname
+to be translated in a single call to the specific filesystem
+and thus accommodates filesystems with either preference.
+The FSS is least well understood, as there is little public information
+about the interface.
+However, the design goals are the least consistent with those of the Berkeley
+research groups.
+Accordingly, a new filesystem interface has been devised to avoid
+some of the problems in the other systems.
+The proposed interface derives directly from Sun's VFS,
+but, like GFS, uses a 4.3BSD-style name lookup interface.
+Additional context information has been moved from the \fIuser\fP structure
+to the \fInameidata\fP structure so that name translation may be independent
+of the global context of a user process.
+This is especially desired in any system where kernel-mode servers
+operate as light-weight or interrupt-level processes,
+or where a server may store or cache context for several clients.
+This calling interface has the additional advantage
+that the call parameters need not all be pushed onto the stack for each call
+through the filesystem interface,
+and they may be accessed using short offsets from a base pointer
+(unlike global variables in the \fIuser\fP structure).
+.PP
+The proposed filesystem interface is described very tersely here.
+For the most part, data structures and procedures are analogous
+to those used by VFS, and only the changes will be be treated here.
+See [Kleiman86] for complete descriptions of the vfs and vnode operations
+in Sun's interface.
+.PP
+The central data structure for name translation is the \fInameidata\fP
+structure.
+The same structure is used to pass parameters to \fInamei\fP,
+to pass these same parameters to filesystem-specific lookup routines,
+to communicate completion status from the lookup routines back to \fInamei\fP,
+and to return completion status to the calling routine.
+For creation or deletion requests, the parameters to the filesystem operation
+to complete the request are also passed in this same structure.
+The form of the \fInameidata\fP structure is:
+.br
+.ne 2i
+.ID
+.nf
+.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
+/*
+ * Encapsulation of namei parameters.
+ * One of these is located in the u. area to
+ * minimize space allocated on the kernel stack
+ * and to retain per-process context.
+ */
+struct nameidata {
+ /* arguments to namei and related context: */
+ caddr_t ni_dirp; /* pathname pointer */
+ enum uio_seg ni_seg; /* location of pathname */
+ short ni_nameiop; /* see below */
+ struct vnode *ni_cdir; /* current directory */
+ struct vnode *ni_rdir; /* root directory, if not normal root */
+ struct ucred *ni_cred; /* credentials */
+
+ /* shared between namei, lookup routines and commit routines: */
+ caddr_t ni_pnbuf; /* pathname buffer */
+ char *ni_ptr; /* current location in pathname */
+ int ni_pathlen; /* remaining chars in path */
+ short ni_more; /* more left to translate in pathname */
+ short ni_loopcnt; /* count of symlinks encountered */
+
+ /* results: */
+ struct vnode *ni_vp; /* vnode of result */
+ struct vnode *ni_dvp; /* vnode of intermediate directory */
+
+/* BEGIN UFS SPECIFIC */
+ struct diroffcache { /* last successful directory search */
+ struct vnode *nc_prevdir; /* terminal directory */
+ long nc_id; /* directory's unique id */
+ off_t nc_prevoffset; /* where last entry found */
+ } ni_nc;
+/* END UFS SPECIFIC */
+};
+.DE
+.DS
+.ta \w'#define\0\0'u +\w'WANTPARENT\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
+/*
+ * namei operations and modifiers
+ */
+#define LOOKUP 0 /* perform name lookup only */
+#define CREATE 1 /* setup for file creation */
+#define DELETE 2 /* setup for file deletion */
+#define WANTPARENT 0x10 /* return parent directory vnode also */
+#define NOCACHE 0x20 /* name must not be left in cache */
+#define FOLLOW 0x40 /* follow symbolic links */
+#define NOFOLLOW 0x0 /* don't follow symbolic links (pseudo) */
+.DE
+As in current systems other than Sun's VFS, \fInamei\fP is called
+with an operation request, one of LOOKUP, CREATE or DELETE.
+For a LOOKUP, the operation is exactly like the lookup in VFS.
+CREATE and DELETE allow the filesystem to ensure consistency
+by locking the parent inode (private to the filesystem),
+and (for the local filesystem) to avoid duplicate directory scans
+by storing the new directory entry and its offset in the directory
+in the \fIndirinfo\fP structure.
+This is intended to be opaque to the filesystem-independent levels.
+Not all lookups for creation or deletion are actually followed
+by the intended operation; permission may be denied, the filesystem
+may be read-only, etc.
+Therefore, an entry point to the filesystem is provided
+to abort a creation or deletion operation
+and allow release of any locked internal data.
+After a \fInamei\fP with a CREATE or DELETE flag, the pathname pointer
+is set to point to the last filename component.
+Filesystems that choose to implement creation or deletion entirely
+within the subsequent call to a create or delete entry
+are thus free to do so.
+.PP
+The \fInameidata\fP is used to store context used during name translation.
+The current and root directories for the translation are stored here.
+For the local filesystem, the per-process directory offset cache
+is also kept here.
+A file server could leave the directory offset cache empty,
+could use a single cache for all clients,
+or could hold caches for several recent clients.
+.PP
+Several other data structures are used in the filesystem operations.
+One is the \fIucred\fP structure which describes a client's credentials
+to the filesystem.
+This is modified slightly from the Sun structure;
+the ``accounting'' group ID has been merged into the groups array.
+The actual number of groups in the array is given explicitly
+to avoid use of a reserved group ID as a terminator.
+Also, typedefs introduced in 4.3BSD for user and group ID's have been used.
+The \fIucred\fP structure is thus:
+.DS
+.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
+/*
+ * Credentials.
+ */
+struct ucred {
+ u_short cr_ref; /* reference count */
+ uid_t cr_uid; /* effective user id */
+ short cr_ngroups; /* number of groups */
+ gid_t cr_groups[NGROUPS]; /* groups */
+ /*
+ * The following either should not be here,
+ * or should be treated as opaque.
+ */
+ uid_t cr_ruid; /* real user id */
+ gid_t cr_svgid; /* saved set-group id */
+};
+.DE
+.PP
+A final structure used by the filesystem interface is the \fIuio\fP
+structure mentioned earlier.
+This structure describes the source or destination of an I/O
+operation, with provision for scatter/gather I/O.
+It is used in the read and write entries to the filesystem.
+The \fIuio\fP structure presented here is modified from the one
+used in 4.2BSD to specify the location of each vector of the operation
+(user or kernel space)
+and to allow an alternate function to be used to implement the data movement.
+The alternate function might perform page remapping rather than a copy,
+for example.
+.DS
+.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
+/*
+ * Description of an I/O operation which potentially
+ * involves scatter-gather, with individual sections
+ * described by iovec, below. uio_resid is initially
+ * set to the total size of the operation, and is
+ * decremented as the operation proceeds. uio_offset
+ * is incremented by the amount of each operation.
+ * uio_iov is incremented and uio_iovcnt is decremented
+ * after each vector is processed.
+ */
+struct uio {
+ struct iovec *uio_iov;
+ int uio_iovcnt;
+ off_t uio_offset;
+ int uio_resid;
+ enum uio_rw uio_rw;
+};
+
+enum uio_rw { UIO_READ, UIO_WRITE };
+.DE
+.DS
+.ta .5i +\w'caddr_t\0\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
+/*
+ * Description of a contiguous section of an I/O operation.
+ * If iov_op is non-null, it is called to implement the copy
+ * operation, possibly by remapping, with the call
+ * (*iov_op)(from, to, count);
+ * where from and to are caddr_t and count is int.
+ * Otherwise, the copy is done in the normal way,
+ * treating base as a user or kernel virtual address
+ * according to iov_segflg.
+ */
+struct iovec {
+ caddr_t iov_base;
+ int iov_len;
+ enum uio_seg iov_segflg;
+ int (*iov_op)();
+};
+.DE
+.DS
+.ta .5i +\w'UIO_USERISPACE\0\0\0\0\0'u
+/*
+ * Segment flag values.
+ */
+enum uio_seg {
+ UIO_USERSPACE, /* from user data space */
+ UIO_SYSSPACE, /* from system space */
+ UIO_USERISPACE /* from user I space */
+};
+.DE
+.SH
+File and filesystem operations
+.PP
+With the introduction of the data structures used by the filesystem
+operations, the complete list of filesystem entry points may be listed.
+As noted, they derive mostly from the Sun VFS interface.
+Lines marked with \fB+\fP are additions to the Sun definitions;
+lines marked with \fB!\fP are modified from VFS.
+.PP
+The structure describing the externally-visible features of a mounted
+filesystem, \fIvfs\fP, is:
+.DS
+.ta .5i +\w'struct vfsops\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
+/*
+ * Structure per mounted file system.
+ * Each mounted file system has an array of
+ * operations and an instance record.
+ * The file systems are put on a doubly linked list.
+ */
+struct vfs {
+ struct vfs *vfs_next; /* next vfs in vfs list */
+\fB+\fP struct vfs *vfs_prev; /* prev vfs in vfs list */
+ struct vfsops *vfs_op; /* operations on vfs */
+ struct vnode *vfs_vnodecovered; /* vnode we mounted on */
+ int vfs_flag; /* flags */
+\fB!\fP int vfs_fsize; /* fundamental block size */
+\fB+\fP int vfs_bsize; /* optimal transfer size */
+\fB!\fP uid_t vfs_exroot; /* exported fs uid 0 mapping */
+ short vfs_exflags; /* exported fs flags */
+ caddr_t vfs_data; /* private data */
+};
+.DE
+.DS
+.ta \w'\fB+\fP 'u +\w'#define\0\0'u +\w'VFS_EXPORTED\0\0'u +\w'0x40\0\0\0\0\0'u
+ /*
+ * vfs flags.
+ * VFS_MLOCK lock the vfs so that name lookup cannot proceed past the vfs.
+ * This keeps the subtree stable during mounts and unmounts.
+ */
+ #define VFS_RDONLY 0x01 /* read only vfs */
+\fB+\fP #define VFS_NOEXEC 0x02 /* can't exec from filesystem */
+ #define VFS_MLOCK 0x04 /* lock vfs so that subtree is stable */
+ #define VFS_MWAIT 0x08 /* someone is waiting for lock */
+ #define VFS_NOSUID 0x10 /* don't honor setuid bits on vfs */
+ #define VFS_EXPORTED 0x20 /* file system is exported (NFS) */
+
+ /*
+ * exported vfs flags.
+ */
+ #define EX_RDONLY 0x01 /* exported read only */
+.DE
+.LP
+The operations supported by the filesystem-specific layer
+on an individual filesystem are:
+.DS
+.ta .5i +\w'struct vfsops\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
+/*
+ * Operations supported on virtual file system.
+ */
+struct vfsops {
+\fB!\fP int (*vfs_mount)( /* vfs, path, data, datalen */ );
+\fB!\fP int (*vfs_unmount)( /* vfs, forcibly */ );
+\fB+\fP int (*vfs_mountroot)();
+ int (*vfs_root)( /* vfs, vpp */ );
+\fB!\fP int (*vfs_statfs)( /* vfs, vp, sbp */ );
+\fB!\fP int (*vfs_sync)( /* vfs, waitfor */ );
+\fB+\fP int (*vfs_fhtovp)( /* vfs, fhp, vpp */ );
+\fB+\fP int (*vfs_vptofh)( /* vp, fhp */ );
+};
+.DE
+.LP
+The \fIvfs_statfs\fP entry returns a structure of the form:
+.DS
+.ta .5i +\w'struct vfsops\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
+/*
+ * file system statistics
+ */
+struct statfs {
+\fB!\fP short f_type; /* type of filesystem */
+\fB+\fP short f_flags; /* copy of vfs (mount) flags */
+\fB!\fP long f_fsize; /* fundamental file system block size */
+\fB+\fP long f_bsize; /* optimal transfer block size */
+ long f_blocks; /* total data blocks in file system */
+ long f_bfree; /* free blocks in fs */
+ long f_bavail; /* free blocks avail to non-superuser */
+ long f_files; /* total file nodes in file system */
+ long f_ffree; /* free file nodes in fs */
+ fsid_t f_fsid; /* file system id */
+\fB+\fP char *f_mntonname; /* directory on which mounted */
+\fB+\fP char *f_mntfromname; /* mounted filesystem */
+ long f_spare[7]; /* spare for later */
+};
+
+typedef long fsid_t[2]; /* file system id type */
+.DE
+.LP
+The modifications to Sun's interface at this level are minor.
+Additional arguments are present for the \fIvfs_mount\fP and \fIvfs_umount\fP
+entries.
+\fIvfs_statfs\fP accepts a vnode as well as filesystem identifier,
+as the information may not be uniform throughout a filesystem.
+For example,
+if a client may mount a file tree that spans multiple physical
+filesystems on a server, different sections may have different amounts
+of free space.
+(NFS does not allow remotely-mounted file trees to span physical filesystems
+on the server.)
+The final additions are the entries that support file handles.
+\fIvfs_vptofh\fP is provided for the use of file servers,
+which need to obtain an opaque
+file handle to represent the current vnode for transmission to clients.
+This file handle may later be used to relocate the vnode using \fIvfs_fhtovp\fP
+without requiring the vnode to remain in memory.
+.PP
+Finally, the external form of a filesystem object, the \fIvnode\fP, is:
+.DS
+.ta .5i +\w'struct vnodeops\0\0'u +\w'*v_vfsmountedhere;\0\0\0'u
+/*
+ * vnode types. VNON means no type.
+ */
+enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK };
+
+struct vnode {
+ u_short v_flag; /* vnode flags (see below) */
+ u_short v_count; /* reference count */
+ u_short v_shlockc; /* count of shared locks */
+ u_short v_exlockc; /* count of exclusive locks */
+ struct vfs *v_vfsmountedhere; /* ptr to vfs mounted here */
+ struct vfs *v_vfsp; /* ptr to vfs we are in */
+ struct vnodeops *v_op; /* vnode operations */
+\fB+\fP struct text *v_text; /* text/mapped region */
+ enum vtype v_type; /* vnode type */
+ caddr_t v_data; /* private data for fs */
+};
+.DE
+.DS
+.ta \w'#define\0\0'u +\w'NOFOLLOW\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
+/*
+ * vnode flags.
+ */
+#define VROOT 0x01 /* root of its file system */
+#define VTEXT 0x02 /* vnode is a pure text prototype */
+#define VEXLOCK 0x10 /* exclusive lock */
+#define VSHLOCK 0x20 /* shared lock */
+#define VLWAIT 0x40 /* proc is waiting on shared or excl. lock */
+.DE
+.LP
+The operations supported by the filesystems on individual \fIvnode\fP\^s
+are:
+.DS
+.ta .5i +\w'int\0\0\0\0\0'u +\w'(*vn_getattr)(\0\0\0\0\0'u
+/*
+ * Operations on vnodes.
+ */
+struct vnodeops {
+\fB!\fP int (*vn_lookup)( /* ndp */ );
+\fB!\fP int (*vn_create)( /* ndp, vap, fflags */ );
+\fB+\fP int (*vn_mknod)( /* ndp, vap, fflags */ );
+\fB!\fP int (*vn_open)( /* vp, fflags, cred */ );
+ int (*vn_close)( /* vp, fflags, cred */ );
+ int (*vn_access)( /* vp, fflags, cred */ );
+ int (*vn_getattr)( /* vp, vap, cred */ );
+ int (*vn_setattr)( /* vp, vap, cred */ );
+
+\fB+\fP int (*vn_read)( /* vp, uiop, offp, ioflag, cred */ );
+\fB+\fP int (*vn_write)( /* vp, uiop, offp, ioflag, cred */ );
+\fB!\fP int (*vn_ioctl)( /* vp, com, data, fflag, cred */ );
+ int (*vn_select)( /* vp, which, cred */ );
+\fB+\fP int (*vn_mmap)( /* vp, ..., cred */ );
+ int (*vn_fsync)( /* vp, cred */ );
+\fB+\fP int (*vn_seek)( /* vp, offp, off, whence */ );
+
+\fB!\fP int (*vn_remove)( /* ndp */ );
+\fB!\fP int (*vn_link)( /* vp, ndp */ );
+\fB!\fP int (*vn_rename)( /* src ndp, target ndp */ );
+\fB!\fP int (*vn_mkdir)( /* ndp, vap */ );
+\fB!\fP int (*vn_rmdir)( /* ndp */ );
+\fB!\fP int (*vn_symlink)( /* ndp, vap, nm */ );
+ int (*vn_readdir)( /* vp, uiop, offp, ioflag, cred */ );
+ int (*vn_readlink)( /* vp, uiop, ioflag, cred */ );
+
+\fB+\fP int (*vn_abortop)( /* ndp */ );
+\fB+\fP int (*vn_lock)( /* vp */ );
+\fB+\fP int (*vn_unlock)( /* vp */ );
+\fB!\fP int (*vn_inactive)( /* vp */ );
+};
+.DE
+.DS
+.ta \w'#define\0\0'u +\w'NOFOLLOW\0\0'u +\w'0x40\0\0\0\0\0'u
+/*
+ * flags for ioflag
+ */
+#define IO_UNIT 0x01 /* do io as atomic unit for VOP_RDWR */
+#define IO_APPEND 0x02 /* append write for VOP_RDWR */
+#define IO_SYNC 0x04 /* sync io for VOP_RDWR */
+.DE
+.LP
+The argument types listed in the comments following each operation are:
+.sp
+.IP ndp 10
+A pointer to a \fInameidata\fP structure.
+.IP vap
+A pointer to a \fIvattr\fP structure (vnode attributes; see below).
+.IP fflags
+File open flags, possibly including O_APPEND, O_CREAT, O_TRUNC and O_EXCL.
+.IP vp
+A pointer to a \fIvnode\fP previously obtained with \fIvn_lookup\fP.
+.IP cred
+A pointer to a \fIucred\fP credentials structure.
+.IP uiop
+A pointer to a \fIuio\fP structure.
+.IP ioflag
+Any of the IO flags defined above.
+.IP com
+An \fIioctl\fP command, with type \fIunsigned long\fP.
+.IP data
+A pointer to a character buffer used to pass data to or from an \fIioctl\fP.
+.IP which
+One of FREAD, FWRITE or 0 (select for exceptional conditions).
+.IP off
+A file offset of type \fIoff_t\fP.
+.IP offp
+A pointer to file offset of type \fIoff_t\fP.
+.IP whence
+One of L_SET, L_INCR, or L_XTND.
+.IP fhp
+A pointer to a file handle buffer.
+.sp
+.PP
+Several changes have been made to Sun's set of vnode operations.
+Most obviously, the \fIvn_lookup\fP receives a \fInameidata\fP structure
+containing its arguments and context as described.
+The same structure is also passed to one of the creation or deletion
+entries if the lookup operation is for CREATE or DELETE to complete
+an operation, or to the \fIvn_abortop\fP entry if no operation
+is undertaken.
+For filesystems that perform no locking between lookup for creation
+or deletion and the call to implement that action,
+the final pathname component may be left untranslated by the lookup
+routine.
+In any case, the pathname pointer points at the final name component,
+and the \fInameidata\fP contains a reference to the vnode of the parent
+directory.
+The interface is thus flexible enough to accommodate filesystems
+that are fully stateful or fully stateless, while avoiding redundant
+operations whenever possible.
+One operation remains problematical, the \fIvn_rename\fP call.
+It is tempting to look up the source of the rename for deletion
+and the target for creation.
+However, filesystems that lock directories during such lookups must avoid
+deadlock if the two paths cross.
+For that reason, the source is translated for LOOKUP only,
+with the WANTPARENT flag set;
+the target is then translated with an operation of CREATE.
+.PP
+In addition to the changes concerned with the \fInameidata\fP interface,
+several other changes were made in the vnode operations.
+The \fIvn_rdrw\fP entry was split into \fIvn_read\fP and \fIvn_write\fP;
+frequently, the read/write entry amounts to a routine that checks
+the direction flag, then calls either a read routine or a write routine.
+The two entries may be identical for any given filesystem;
+the direction flag is contained in the \fIuio\fP given as an argument.
+.PP
+All of the read and write operations use a \fIuio\fP to describe
+the file offset and buffer locations.
+All of these fields must be updated before return.
+In particular, the \fIvn_readdir\fP entry uses this
+to return a new file offset token for its current location.
+.PP
+Several new operations have been added.
+The first, \fIvn_seek\fP, is a concession to record-oriented files
+such as directories.
+It allows the filesystem to verify that a seek leaves a file at a sensible
+offset, or to return a new offset token relative to an earlier one.
+For most filesystems and files, this operation amounts to performing
+simple arithmetic.
+Another new entry point is \fIvn_mmap\fP, for use in mapping device memory
+into a user process address space.
+Its semantics are not yet decided.
+The final additions are the \fIvn_lock\fP and \fIvn_unlock\fP entries.
+These are used to request that the underlying file be locked against
+changes for short periods of time if the filesystem implementation allows it.
+They are used to maintain consistency
+during internal operations such as \fIexec\fP,
+and may not be used to construct atomic operations from other filesystem
+operations.
+.PP
+The attributes of a vnode are not stored in the vnode,
+as they might change with time and may need to be read from a remote
+source.
+Attributes have the form:
+.DS
+.ta .5i +\w'struct vnodeops\0\0'u +\w'*v_vfsmountedhere;\0\0\0'u
+/*
+ * Vnode attributes. A field value of -1
+ * represents a field whose value is unavailable
+ * (getattr) or which is not to be changed (setattr).
+ */
+struct vattr {
+ enum vtype va_type; /* vnode type (for create) */
+ u_short va_mode; /* files access mode and type */
+\fB!\fP uid_t va_uid; /* owner user id */
+\fB!\fP gid_t va_gid; /* owner group id */
+ long va_fsid; /* file system id (dev for now) */
+\fB!\fP long va_fileid; /* file id */
+ short va_nlink; /* number of references to file */
+ u_long va_size; /* file size in bytes (quad?) */
+\fB+\fP u_long va_size1; /* reserved if not quad */
+ long va_blocksize; /* blocksize preferred for i/o */
+ struct timeval va_atime; /* time of last access */
+ struct timeval va_mtime; /* time of last modification */
+ struct timeval va_ctime; /* time file changed */
+ dev_t va_rdev; /* device the file represents */
+ u_long va_bytes; /* bytes of disk space held by file */
+\fB+\fP u_long va_bytes1; /* reserved if va_bytes not a quad */
+};
+.DE
+.SH
+Conclusions
+.PP
+The Sun VFS filesystem interface is the most widely used generic
+filesystem interface.
+Of the interfaces examined, it creates the cleanest separation
+between the filesystem-independent and -dependent layers and data structures.
+It has several flaws, but it is felt that certain changes in the interface
+can ameliorate most of them.
+The interface proposed here includes those changes.
+The proposed interface is now being implemented by the Computer Systems
+Research Group at Berkeley.
+If the design succeeds in improving the flexibility and performance
+of the filesystem layering, it will be advanced as a model interface.
+.SH
+Acknowledgements
+.PP
+The filesystem interface described here is derived from Sun's VFS interface.
+It also includes features similar to those of DEC's GFS interface.
+We are indebted to members of the Sun and DEC system groups
+for long discussions of the issues involved.
+.br
+.ne 2i
+.SH
+References
+
+.IP Brownbridge82 \w'Satyanarayanan85\0\0'u
+Brownbridge, D.R., L.F. Marshall, B. Randell,
+``The Newcastle Connection, or UNIXes of the World Unite!,''
+\fISoftware\- Practice and Experience\fP, Vol. 12, pp. 1147-1162, 1982.
+
+.IP Cole85
+Cole, C.T., P.B. Flinn, A.B. Atlas,
+``An Implementation of an Extended File System for UNIX,''
+\fIUsenix Conference Proceedings\fP,
+pp. 131-150, June, 1985.
+
+.IP Kleiman86
+``Vnodes: An Architecture for Multiple File System Types in Sun UNIX,''
+\fIUsenix Conference Proceedings\fP,
+pp. 238-247, June, 1986.
+
+.IP Leffler84
+Leffler, S., M.K. McKusick, M. Karels,
+``Measuring and Improving the Performance of 4.2BSD,''
+\fIUsenix Conference Proceedings\fP, pp. 237-252, June, 1984.
+
+.IP McKusick84
+McKusick, M.K., W.N. Joy, S.J. Leffler, R.S. Fabry,
+``A Fast File System for UNIX,'' \fITransactions on Computer Systems\fP,
+Vol. 2, pp. 181-197,
+ACM, August, 1984.
+
+.IP McKusick85
+McKusick, M.K., M. Karels, S. Leffler,
+``Performance Improvements and Functional Enhancements in 4.3BSD,''
+\fIUsenix Conference Proceedings\fP, pp. 519-531, June, 1985.
+
+.IP Rifkin86
+Rifkin, A.P., M.P. Forbes, R.L. Hamilton, M. Sabrio, S. Shah, and K. Yueh,
+``RFS Architectural Overview,'' \fIUsenix Conference Proceedings\fP,
+pp. 248-259, June, 1986.
+
+.IP Ritchie74
+Ritchie, D.M. and K. Thompson, ``The Unix Time-Sharing System,''
+\fICommunications of the ACM\fP, Vol. 17, pp. 365-375, July, 1974.
+
+.IP Rodriguez86
+Rodriguez, R., M. Koehler, R. Hyde,
+``The Generic File System,'' \fIUsenix Conference Proceedings\fP,
+pp. 260-269, June, 1986.
+
+.IP Sandberg85
+Sandberg, R., D. Goldberg, S. Kleiman, D. Walsh, B. Lyon,
+``Design and Implementation of the Sun Network Filesystem,''
+\fIUsenix Conference Proceedings\fP,
+pp. 119-130, June, 1985.
+
+.IP Satyanarayanan85
+Satyanarayanan, M., \fIet al.\fP,
+``The ITC Distributed File System: Principles and Design,''
+\fIProc. 10th Symposium on Operating Systems Principles\fP, pp. 35-50,
+ACM, December, 1985.
+
+.IP Walker85
+Walker, B.J. and S.H. Kiser, ``The LOCUS Distributed Filesystem,''
+\fIThe LOCUS Distributed System Architecture\fP,
+G.J. Popek and B.J. Walker, ed., The MIT Press, Cambridge, MA, 1985.
+
+.IP Weinberger84
+Weinberger, P.J., ``The Version 8 Network File System,''
+\fIUsenix Conference presentation\fP,
+June, 1984.
diff --git a/share/doc/papers/fsinterface/slides.t b/share/doc/papers/fsinterface/slides.t
new file mode 100644
index 0000000..3caaafb
--- /dev/null
+++ b/share/doc/papers/fsinterface/slides.t
@@ -0,0 +1,318 @@
+.\" Copyright (c) 1986 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)slides.t 5.2 (Berkeley) 4/16/91
+.\"
+.so macros
+.nf
+.LL
+Encapsulation of namei parameters
+.NP 0
+.ta .5i +\w'caddr_t\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
+struct nameidata {
+ /* arguments and context: */
+ caddr_t ni_dirp;
+ enum uio_seg ni_seg;
+ short ni_nameiop;
+ struct vnode *ni_cdir;
+ struct vnode *ni_rdir;
+ struct ucred *ni_cred;
+.sp .2
+ /* shared with lookup and commit: */
+ caddr_t ni_pnbuf;
+ char *ni_ptr;
+ int ni_pathlen;
+ short ni_more;
+ short ni_loopcnt;
+.sp .2
+ /* results: */
+ struct vnode *ni_vp;
+ struct vnode *ni_dvp;
+.sp .2
+/* BEGIN UFS SPECIFIC */
+ struct diroffcache {
+ struct vnode *nc_prevdir;
+ long nc_id;
+ off_t nc_prevoffset;
+ } ni_nc;
+/* END UFS SPECIFIC */
+};
+.bp
+
+
+.LL
+Namei operations and modifiers
+
+.NP 0
+.ta \w'#define\0\0'u +\w'WANTPARENT\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
+#define LOOKUP 0 /* name lookup only */
+#define CREATE 1 /* setup for creation */
+#define DELETE 2 /* setup for deletion */
+#define WANTPARENT 0x10 /* return parent vnode also */
+#define NOCACHE 0x20 /* remove name from cache */
+#define FOLLOW 0x40 /* follow symbolic links */
+.bp
+
+.LL
+Namei operations and modifiers
+
+.NP 0
+.ta \w'#define\0\0'u +\w'WANTPARENT\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
+#define LOOKUP 0
+#define CREATE 1
+#define DELETE 2
+#define WANTPARENT 0x10
+#define NOCACHE 0x20
+#define FOLLOW 0x40
+.bp
+
+
+.LL
+Credentials
+
+.NP 0
+.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
+struct ucred {
+ u_short cr_ref;
+ uid_t cr_uid;
+ short cr_ngroups;
+ gid_t cr_groups[NGROUPS];
+ /*
+ * The following either should not be here,
+ * or should be treated as opaque.
+ */
+ uid_t cr_ruid;
+ gid_t cr_svgid;
+};
+.bp
+.LL
+Scatter-gather I/O
+.NP 0
+.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
+struct uio {
+ struct iovec *uio_iov;
+ int uio_iovcnt;
+ off_t uio_offset;
+ int uio_resid;
+ enum uio_rw uio_rw;
+};
+
+enum uio_rw { UIO_READ, UIO_WRITE };
+
+
+
+.ta .5i +\w'caddr_t\0\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
+struct iovec {
+ caddr_t iov_base;
+ int iov_len;
+ enum uio_seg iov_segflg;
+ int (*iov_op)();
+};
+.bp
+.LL
+Per-filesystem information
+.NP 0
+.ta .25i +\w'struct vfsops\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
+struct vfs {
+ struct vfs *vfs_next;
+\fB+\fP struct vfs *vfs_prev;
+ struct vfsops *vfs_op;
+ struct vnode *vfs_vnodecovered;
+ int vfs_flag;
+\fB!\fP int vfs_fsize;
+\fB+\fP int vfs_bsize;
+\fB!\fP uid_t vfs_exroot;
+ short vfs_exflags;
+ caddr_t vfs_data;
+};
+
+.NP 0
+.ta \w'\fB+\fP 'u +\w'#define\0\0'u +\w'VFS_EXPORTED\0\0'u +\w'0x40\0\0\0\0\0'u
+ /* vfs flags: */
+ #define VFS_RDONLY 0x01
+\fB+\fP #define VFS_NOEXEC 0x02
+ #define VFS_MLOCK 0x04
+ #define VFS_MWAIT 0x08
+ #define VFS_NOSUID 0x10
+ #define VFS_EXPORTED 0x20
+
+ /* exported vfs flags: */
+ #define EX_RDONLY 0x01
+.bp
+
+
+.LL
+Operations supported on virtual file system.
+
+.NP 0
+.ta .25i +\w'int\0\0'u +\w'*vfs_mountroot();\0'u
+struct vfsops {
+\fB!\fP int (*vfs_mount)(vfs, path, data, len);
+\fB!\fP int (*vfs_unmount)(vfs, forcibly);
+\fB+\fP int (*vfs_mountroot)();
+ int (*vfs_root)(vfs, vpp);
+ int (*vfs_statfs)(vfs, sbp);
+\fB!\fP int (*vfs_sync)(vfs, waitfor);
+\fB+\fP int (*vfs_fhtovp)(vfs, fhp, vpp);
+\fB+\fP int (*vfs_vptofh)(vp, fhp);
+};
+.bp
+
+
+.LL
+Dynamic file system information
+
+.NP 0
+.ta .5i +\w'struct\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
+struct statfs {
+\fB!\fP short f_type;
+\fB+\fP short f_flags;
+\fB!\fP long f_fsize;
+\fB+\fP long f_bsize;
+ long f_blocks;
+ long f_bfree;
+ long f_bavail;
+ long f_files;
+ long f_ffree;
+ fsid_t f_fsid;
+\fB+\fP char *f_mntonname;
+\fB+\fP char *f_mntfromname;
+ long f_spare[7];
+};
+
+typedef long fsid_t[2];
+.bp
+.LL
+Filesystem objects (vnodes)
+.NP 0
+.ta .25i +\w'struct vnodeops\0\0'u +\w'*v_vfsmountedhere;\0\0\0'u
+enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK };
+
+struct vnode {
+ u_short v_flag;
+ u_short v_count;
+ u_short v_shlockc;
+ u_short v_exlockc;
+ struct vfs *v_vfsmountedhere;
+ struct vfs *v_vfsp;
+ struct vnodeops *v_op;
+\fB+\fP struct text *v_text;
+ enum vtype v_type;
+ caddr_t v_data;
+};
+.ta \w'#define\0\0'u +\w'NOFOLLOW\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
+
+/* vnode flags */
+#define VROOT 0x01
+#define VTEXT 0x02
+#define VEXLOCK 0x10
+#define VSHLOCK 0x20
+#define VLWAIT 0x40
+.bp
+.LL
+Operations on vnodes
+
+.NP 0
+.ta .25i +\w'int\0\0'u +\w'(*vn_getattr)(\0\0\0\0\0'u
+struct vnodeops {
+\fB!\fP int (*vn_lookup)(ndp);
+\fB!\fP int (*vn_create)(ndp, vap, fflags);
+\fB+\fP int (*vn_mknod)(ndp, vap, fflags);
+\fB!\fP int (*vn_open)(vp, fflags, cred);
+ int (*vn_close)(vp, fflags, cred);
+ int (*vn_access)(vp, fflags, cred);
+ int (*vn_getattr)(vp, vap, cred);
+ int (*vn_setattr)(vp, vap, cred);
+.sp .5
+\fB+\fP int (*vn_read)(vp, uiop,
+ offp, ioflag, cred);
+\fB+\fP int (*vn_write)(vp, uiop,
+ offp, ioflag, cred);
+\fB!\fP int (*vn_ioctl)(vp, com,
+ data, fflag, cred);
+ int (*vn_select)(vp, which, cred);
+\fB+\fP int (*vn_mmap)(vp, ..., cred);
+ int (*vn_fsync)(vp, cred);
+\fB+\fP int (*vn_seek)(vp, offp, off,
+ whence);
+.bp
+.LL
+Operations on vnodes (cont)
+
+.NP 0
+.ta .25i +\w'int\0\0'u +\w'(*vn_getattr)(\0\0\0\0\0'u
+
+\fB!\fP int (*vn_remove)(ndp);
+\fB!\fP int (*vn_link)(vp, ndp);
+\fB!\fP int (*vn_rename)(sndp, tndp);
+\fB!\fP int (*vn_mkdir)(ndp, vap);
+\fB!\fP int (*vn_rmdir)(ndp);
+\fB!\fP int (*vn_symlink)(ndp, vap, nm);
+\fB!\fP int (*vn_readdir)(vp, uiop,
+ offp, ioflag, cred);
+\fB!\fP int (*vn_readlink)(vp, uiop,
+ offp, ioflag, cred);
+.sp .5
+\fB+\fP int (*vn_abortop)(ndp);
+\fB!\fP int (*vn_inactive)(vp);
+};
+
+.NP 0
+.ta \w'#define\0\0'u +\w'NOFOLLOW\0\0'u +\w'0x40\0\0\0\0\0'u
+/* flags for ioflag */
+#define IO_UNIT 0x01
+#define IO_APPEND 0x02
+#define IO_SYNC 0x04
+.bp
+
+.LL
+Vnode attributes
+
+.NP 0
+.ta .5i +\w'struct timeval\0\0'u +\w'*v_vfsmountedhere;\0\0\0'u
+struct vattr {
+ enum vtype va_type;
+ u_short va_mode;
+\fB!\fP uid_t va_uid;
+\fB!\fP gid_t va_gid;
+ long va_fsid;
+\fB!\fP long va_fileid;
+ short va_nlink;
+ u_long va_size;
+\fB+\fP u_long va_size1;
+ long va_blocksize;
+ struct timeval va_atime;
+ struct timeval va_mtime;
+ struct timeval va_ctime;
+ dev_t va_rdev;
+\fB!\fP u_long va_bytes;
+\fB+\fP u_long va_bytes1;
+};
diff --git a/share/doc/papers/kernmalloc/Makefile b/share/doc/papers/kernmalloc/Makefile
new file mode 100644
index 0000000..8966f36
--- /dev/null
+++ b/share/doc/papers/kernmalloc/Makefile
@@ -0,0 +1,11 @@
+# @(#)Makefile 1.8 (Berkeley) 6/8/93
+
+DIR= papers/kernmalloc
+SRCS= kernmalloc.t appendix.t
+MACROS= -ms
+
+paper.ps: ${SRCS} alloc.fig usage.tbl
+ ${SOELIM} ${SRCS} | ${TBL} | ${PIC} | ${EQN} | ${GRIND} | \
+ ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/kernmalloc/alloc.fig b/share/doc/papers/kernmalloc/alloc.fig
new file mode 100644
index 0000000..1ef260b
--- /dev/null
+++ b/share/doc/papers/kernmalloc/alloc.fig
@@ -0,0 +1,115 @@
+.\" Copyright (c) 1988 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)alloc.fig 5.1 (Berkeley) 4/16/91
+.\"
+.PS
+scale=100
+define m0 |
+[ box invis ht 16 wid 32 with .sw at 0,0
+line from 4,12 to 4,4
+line from 8,12 to 8,4
+line from 12,12 to 12,4
+line from 16,12 to 16,4
+line from 20,12 to 20,4
+line from 24,12 to 24,4
+line from 28,12 to 28,4
+line from 0,16 to 0,0
+line from 0,8 to 32,8
+] |
+
+define m1 |
+[ box invis ht 16 wid 32 with .sw at 0,0
+line from 8,12 to 8,4
+line from 16,12 to 16,4
+line from 24,12 to 24,4
+line from 0,8 to 32,8
+line from 0,16 to 0,0
+] |
+
+define m2 |
+[ box invis ht 16 wid 32 with .sw at 0,0
+line from 0,8 to 32,8
+line from 0,16 to 0,0
+] |
+
+define m3 |
+[ box invis ht 16 wid 31 with .sw at 0,0
+line from 15,12 to 15,4
+line from 0,8 to 31,8
+line from 0,16 to 0,0
+] |
+
+box invis ht 212 wid 580 with .sw at 0,0
+"\f1\s10\&kernel memory pages\f1\s0" at 168,204
+"\f1\s10\&Legend:\f1\s0" at 36,144
+"\f1\s10\&cont \- continuation of previous page\f1\s0" at 28,112 ljust
+"\f1\s10\&free \- unused page\f1\s0" at 28,128 ljust
+"\f1\s10\&Usage:\f1\s0" at 34,87
+"\f1\s10\&memsize(addr)\f1\s0" at 36,71 ljust
+"\f1\s10\&char *addr;\f1\s0" at 66,56 ljust
+"\f1\s10\&{\f1\s0" at 36,43 ljust
+"\f1\s10\&return(kmemsizes[(addr \- kmembase) \- \s-1PAGESIZE\s+1]);\f1" at 66,29 ljust
+"\f1\s10\&}\f1\s0" at 36,8 ljust
+line from 548,192 to 548,176
+line from 548,184 to 580,184 dotted
+"\f1\s10\&1024,\f1\s0" at 116,168
+"\f1\s10\&256,\f1\s0" at 148,168
+"\f1\s10\&512,\f1\s0" at 180,168
+"\f1\s10\&3072,\f1\s0" at 212,168
+"\f1\s10\&cont,\f1\s0" at 276,168
+"\f1\s10\&cont,\f1\s0" at 244,168
+"\f1\s10\&128,\f1\s0" at 308,168
+"\f1\s10\&128,\f1\s0" at 340,168
+"\f1\s10\&free,\f1\s0" at 372,168
+"\f1\s10\&cont,\f1\s0" at 404,168
+"\f1\s10\&128,\f1\s0" at 436,168
+"\f1\s10\&1024,\f1\s0" at 468,168
+"\f1\s10\&free,\f1\s0" at 500,168
+"\f1\s10\&cont,\f1\s0" at 532,168
+"\f1\s10\&cont,\f1\s0" at 564,168
+m2 with .nw at 100,192
+m1 with .nw at 132,192
+m3 with .nw at 164,192
+m2 with .nw at 196,192
+m2 with .nw at 228,192
+m2 with .nw at 260,192
+m0 with .nw at 292,192
+m0 with .nw at 324,192
+m2 with .nw at 356,192
+m2 with .nw at 388,192
+m0 with .nw at 420,192
+m2 with .nw at 452,192
+m2 with .nw at 484,192
+m2 with .nw at 516,192
+"\f1\s10\&kmemsizes[] = {\f1\s0" at 100,168 rjust
+"\f1\s10\&char *kmembase\f1\s0" at 97,184 rjust
+.PE
diff --git a/share/doc/papers/kernmalloc/appendix.t b/share/doc/papers/kernmalloc/appendix.t
new file mode 100644
index 0000000..bcd3e8c
--- /dev/null
+++ b/share/doc/papers/kernmalloc/appendix.t
@@ -0,0 +1,137 @@
+.\" Copyright (c) 1988 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)appendix.t 5.1 (Berkeley) 4/16/91
+.\"
+.bp
+.H 1 "Appendix A - Implementation Details"
+.LP
+.nf
+.vS
+/*
+ * Constants for setting the parameters of the kernel memory allocator.
+ *
+ * 2 ** MINBUCKET is the smallest unit of memory that will be
+ * allocated. It must be at least large enough to hold a pointer.
+ *
+ * Units of memory less or equal to MAXALLOCSAVE will permanently
+ * allocate physical memory; requests for these size pieces of memory
+ * are quite fast. Allocations greater than MAXALLOCSAVE must
+ * always allocate and free physical memory; requests for these size
+ * allocations should be done infrequently as they will be slow.
+ * Constraints: CLBYTES <= MAXALLOCSAVE <= 2 ** (MINBUCKET + 14)
+ * and MAXALLOCSIZE must be a power of two.
+ */
+#define MINBUCKET 4 /* 4 => min allocation of 16 bytes */
+#define MAXALLOCSAVE (2 * CLBYTES)
+
+/*
+ * Maximum amount of kernel dynamic memory.
+ * Constraints: must be a multiple of the pagesize.
+ */
+#define MAXKMEM (1024 * PAGESIZE)
+
+/*
+ * Arena for all kernel dynamic memory allocation.
+ * This arena is known to start on a page boundary.
+ */
+extern char kmembase[MAXKMEM];
+
+/*
+ * Array of descriptors that describe the contents of each page
+ */
+struct kmemsizes {
+ short ks_indx; /* bucket index, size of small allocations */
+ u_short ks_pagecnt; /* for large allocations, pages allocated */
+} kmemsizes[MAXKMEM / PAGESIZE];
+
+/*
+ * Set of buckets for each size of memory block that is retained
+ */
+struct kmembuckets {
+ caddr_t kb_next; /* list of free blocks */
+} bucket[MINBUCKET + 16];
+.bp
+/*
+ * Macro to convert a size to a bucket index. If the size is constant,
+ * this macro reduces to a compile time constant.
+ */
+#define MINALLOCSIZE (1 << MINBUCKET)
+#define BUCKETINDX(size) \
+ (size) <= (MINALLOCSIZE * 128) \
+ ? (size) <= (MINALLOCSIZE * 8) \
+ ? (size) <= (MINALLOCSIZE * 2) \
+ ? (size) <= (MINALLOCSIZE * 1) \
+ ? (MINBUCKET + 0) \
+ : (MINBUCKET + 1) \
+ : (size) <= (MINALLOCSIZE * 4) \
+ ? (MINBUCKET + 2) \
+ : (MINBUCKET + 3) \
+ : (size) <= (MINALLOCSIZE* 32) \
+ ? (size) <= (MINALLOCSIZE * 16) \
+ ? (MINBUCKET + 4) \
+ : (MINBUCKET + 5) \
+ : (size) <= (MINALLOCSIZE * 64) \
+ ? (MINBUCKET + 6) \
+ : (MINBUCKET + 7) \
+ : (size) <= (MINALLOCSIZE * 2048) \
+ /* etc ... */
+
+/*
+ * Macro versions for the usual cases of malloc/free
+ */
+#define MALLOC(space, cast, size, flags) { \
+ register struct kmembuckets *kbp = &bucket[BUCKETINDX(size)]; \
+ long s = splimp(); \
+ if (kbp->kb_next == NULL) { \
+ (space) = (cast)malloc(size, flags); \
+ } else { \
+ (space) = (cast)kbp->kb_next; \
+ kbp->kb_next = *(caddr_t *)(space); \
+ } \
+ splx(s); \
+}
+
+#define FREE(addr) { \
+ register struct kmembuckets *kbp; \
+ register struct kmemsizes *ksp = \
+ &kmemsizes[((addr) - kmembase) / PAGESIZE]; \
+ long s = splimp(); \
+ if (1 << ksp->ks_indx > MAXALLOCSAVE) { \
+ free(addr); \
+ } else { \
+ kbp = &bucket[ksp->ks_indx]; \
+ *(caddr_t *)(addr) = kbp->kb_next; \
+ kbp->kb_next = (caddr_t)(addr); \
+ } \
+ splx(s); \
+}
+.vE
diff --git a/share/doc/papers/kernmalloc/kernmalloc.t b/share/doc/papers/kernmalloc/kernmalloc.t
new file mode 100644
index 0000000..62df1b4
--- /dev/null
+++ b/share/doc/papers/kernmalloc/kernmalloc.t
@@ -0,0 +1,649 @@
+.\" Copyright (c) 1988 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)kernmalloc.t 5.1 (Berkeley) 4/16/91
+.\"
+.\" reference a system routine name
+.de RN
+\fI\\$1\fP\^(\h'1m/24u')\\$2
+..
+.\" reference a header name
+.de H
+.NH \\$1
+\\$2
+..
+.\" begin figure
+.\" .FI "title"
+.nr Fn 0 1
+.de FI
+.ds Lb Figure \\n+(Fn
+.ds Lt \\$1
+.KF
+.DS B
+.nf
+..
+.\"
+.\" end figure
+.de Fe
+.sp .5
+.\" cheat: original indent is stored in \n(OI by .DS B; restore it
+.\" then center legend after .DE rereads and centers the block.
+\\\\.in \\n(OI
+\\\\.ce
+\\\\*(Lb. \\\\*(Lt
+.sp .5
+.DE
+.KE
+.if \nd 'ls 2
+..
+.EQ
+delim $$
+.EN
+.ds CH "
+.pn 295
+.sp
+.rs
+.ps -1
+.sp -1
+.fi
+Reprinted from:
+\fIProceedings of the San Francisco USENIX Conference\fP,
+pp. 295-303, June 1988.
+.ps
+.\".sp |\n(HMu
+.rm CM
+.nr PO 1.25i
+.TL
+Design of a General Purpose Memory Allocator for the 4.3BSD UNIX\(dg Kernel
+.ds LF Summer USENIX '88
+.ds CF "%
+.ds RF San Francisco, June 20-24
+.EH 'Design of a General Purpose Memory ...''McKusick, Karels'
+.OH 'McKusick, Karels''Design of a General Purpose Memory ...'
+.FS
+\(dgUNIX is a registered trademark of AT&T in the US and other countries.
+.FE
+.AU
+Marshall Kirk McKusick
+.AU
+Michael J. Karels
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.AB
+The 4.3BSD UNIX kernel uses many memory allocation mechanisms,
+each designed for the particular needs of the utilizing subsystem.
+This paper describes a general purpose dynamic memory allocator
+that can be used by all of the kernel subsystems.
+The design of this allocator takes advantage of known memory usage
+patterns in the UNIX kernel and a hybrid strategy that is time-efficient
+for small allocations and space-efficient for large allocations.
+This allocator replaces the multiple memory allocation interfaces
+with a single easy-to-program interface,
+results in more efficient use of global memory by eliminating
+partitioned and specialized memory pools,
+and is quick enough that no performance loss is observed
+relative to the current implementations.
+The paper concludes with a discussion of our experience in using
+the new memory allocator,
+and directions for future work.
+.AE
+.LP
+.H 1 "Kernel Memory Allocation in 4.3BSD
+.PP
+The 4.3BSD kernel has at least ten different memory allocators.
+Some of them handle large blocks,
+some of them handle small chained data structures,
+and others include information to describe I/O operations.
+Often the allocations are for small pieces of memory that are only
+needed for the duration of a single system call.
+In a user process such short-term
+memory would be allocated on the run-time stack.
+Because the kernel has a limited run-time stack,
+it is not feasible to allocate even moderate blocks of memory on it.
+Consequently, such memory must be allocated through a more dynamic mechanism.
+For example,
+when the system must translate a pathname,
+it must allocate a one kilobye buffer to hold the name.
+Other blocks of memory must be more persistent than a single system call
+and really have to be allocated from dynamic memory.
+Examples include protocol control blocks that remain throughout
+the duration of the network connection.
+.PP
+Demands for dynamic memory allocation in the kernel have increased
+as more services have been added.
+Each time a new type of memory allocation has been required,
+a specialized memory allocation scheme has been written to handle it.
+Often the new memory allocation scheme has been built on top
+of an older allocator.
+For example, the block device subsystem provides a crude form of
+memory allocation through the allocation of empty buffers [Thompson78].
+The allocation is slow because of the implied semantics of
+finding the oldest buffer, pushing its contents to disk if they are dirty,
+and moving physical memory into or out of the buffer to create
+the requested size.
+To reduce the overhead, a ``new'' memory allocator was built in 4.3BSD
+for name translation that allocates a pool of empty buffers.
+It keeps them on a free list so they can
+be quickly allocated and freed [McKusick85].
+.PP
+This memory allocation method has several drawbacks.
+First, the new allocator can only handle a limited range of sizes.
+Second, it depletes the buffer pool, as it steals memory intended
+to buffer disk blocks to other purposes.
+Finally, it creates yet another interface of
+which the programmer must be aware.
+.PP
+A generalized memory allocator is needed to reduce the complexity
+of writing code inside the kernel.
+Rather than providing many semi-specialized ways of allocating memory,
+the kernel should provide a single general purpose allocator.
+With only a single interface,
+programmers do not need to figure
+out the most appropriate way to allocate memory.
+If a good general purpose allocator is available,
+it helps avoid the syndrome of creating yet another special
+purpose allocator.
+.PP
+To ease the task of understanding how to use it,
+the memory allocator should have an interface similar to the interface
+of the well-known memory allocator provided for
+applications programmers through the C library routines
+.RN malloc
+and
+.RN free .
+Like the C library interface,
+the allocation routine should take a parameter specifying the
+size of memory that is needed.
+The range of sizes for memory requests should not be constrained.
+The free routine should take a pointer to the storage being freed,
+and should not require additional information such as the size
+of the piece of memory being freed.
+.H 1 "Criteria for a Kernel Memory Allocator
+.PP
+The design specification for a kernel memory allocator is similar to,
+but not identical to,
+the design criteria for a user level memory allocator.
+The first criterion for a memory allocator is that it make good use
+of the physical memory.
+Good use of memory is measured by the amount of memory needed to hold
+a set of allocations at any point in time.
+Percentage utilization is expressed as:
+.EQ
+utilization~=~requested over required
+.EN
+Here, ``requested'' is the sum of the memory that has been requested
+and not yet freed.
+``Required'' is the amount of memory that has been
+allocated for the pool from which the requests are filled.
+An allocator requires more memory than requested because of fragmentation
+and a need to have a ready supply of free memory for future requests.
+A perfect memory allocator would have a utilization of 100%.
+In practice,
+having a 50% utilization is considered good [Korn85].
+.PP
+Good memory utilization in the kernel is more important than
+in user processes.
+Because user processes run in virtual memory,
+unused parts of their address space can be paged out.
+Thus pages in the process address space
+that are part of the ``required'' pool that are not
+being ``requested'' need not tie up physical memory.
+Because the kernel is not paged,
+all pages in the ``required'' pool are held by the kernel and
+cannot be used for other purposes.
+To keep the kernel utilization percentage as high as possible,
+it is desirable to release unused memory in the ``required'' pool
+rather than to hold it as is typically done with user processes.
+Because the kernel can directly manipulate its own page maps,
+releasing unused memory is fast;
+a user process must do a system call to release memory.
+.PP
+The most important criterion for a memory allocator is that it be fast.
+Because memory allocation is done frequently,
+a slow memory allocator will degrade the system performance.
+Speed of allocation is more critical when executing in the
+kernel than in user code,
+because the kernel must allocate many data structure that user
+processes can allocate cheaply on their run-time stack.
+In addition, the kernel represents the platform on which all user
+processes run,
+and if it is slow, it will degrade the performance of every process
+that is running.
+.PP
+Another problem with a slow memory allocator is that programmers
+of frequently-used kernel interfaces will feel that they
+cannot afford to use it as their primary memory allocator.
+Instead they will build their own memory allocator on top of the
+original by maintaining their own pool of memory blocks.
+Multiple allocators reduce the efficiency with which memory is used.
+The kernel ends up with many different free lists of memory
+instead of a single free list from which all allocation can be drawn.
+For example,
+consider the case of two subsystems that need memory.
+If they have their own free lists,
+the amount of memory tied up in the two lists will be the
+sum of the greatest amount of memory that each of
+the two subsystems has ever used.
+If they share a free list,
+the amount of memory tied up in the free list may be as low as the
+greatest amount of memory that either subsystem used.
+As the number of subsystems grows,
+the savings from having a single free list grow.
+.H 1 "Existing User-level Implementations
+.PP
+There are many different algorithms and
+implementations of user-level memory allocators.
+A survey of those available on UNIX systems appeared in [Korn85].
+Nearly all of the memory allocators tested made good use of memory,
+though most of them were too slow for use in the kernel.
+The fastest memory allocator in the survey by nearly a factor of two
+was the memory allocator provided on 4.2BSD originally
+written by Chris Kingsley at California Institute of Technology.
+Unfortunately,
+the 4.2BSD memory allocator also wasted twice as much memory
+as its nearest competitor in the survey.
+.PP
+The 4.2BSD user-level memory allocator works by maintaining a set of lists
+that are ordered by increasing powers of two.
+Each list contains a set of memory blocks of its corresponding size.
+To fulfill a memory request,
+the size of the request is rounded up to the next power of two.
+A piece of memory is then removed from the list corresponding
+to the specified power of two and returned to the requester.
+Thus, a request for a block of memory of size 53 returns
+a block from the 64-sized list.
+A typical memory allocation requires a roundup calculation
+followed by a linked list removal.
+Only if the list is empty is a real memory allocation done.
+The free operation is also fast;
+the block of memory is put back onto the list from which it came.
+The correct list is identified by a size indicator stored
+immediately preceding the memory block.
+.H 1 "Considerations Unique to a Kernel Allocator
+.PP
+There are several special conditions that arise when writing a
+memory allocator for the kernel that do not apply to a user process
+memory allocator.
+First, the maximum memory allocation can be determined at
+the time that the machine is booted.
+This number is never more than the amount of physical memory on the machine,
+and is typically much less since a machine with all its
+memory dedicated to the operating system is uninteresting to use.
+Thus, the kernel can statically allocate a set of data structures
+to manage its dynamically allocated memory.
+These data structures never need to be
+expanded to accommodate memory requests;
+yet, if properly designed, they need not be large.
+For a user process, the maximum amount of memory that may be allocated
+is a function of the maximum size of its virtual memory.
+Although it could allocate static data structures to manage
+its entire virtual memory,
+even if they were efficiently encoded they would potentially be huge.
+The other alternative is to allocate data structures as they are needed.
+However, that adds extra complications such as new
+failure modes if it cannot allocate space for additional
+structures and additional mechanisms to link them all together.
+.PP
+Another special condition of the kernel memory allocator is that it
+can control its own address space.
+Unlike user processes that can only grow and shrink their heap at one end,
+the kernel can keep an arena of kernel addresses and allocate
+pieces from that arena which it then populates with physical memory.
+The effect is much the same as a user process that has parts of
+its address space paged out when they are not in use,
+except that the kernel can explicitly control the set of pages
+allocated to its address space.
+The result is that the ``working set'' of pages in use by the
+kernel exactly corresponds to the set of pages that it is really using.
+.FI "One day memory usage on a Berkeley time-sharing machine"
+.so usage.tbl
+.Fe
+.PP
+A final special condition that applies to the kernel is that
+all of the different uses of dynamic memory are known in advance.
+Each one of these uses of dynamic memory can be assigned a type.
+For each type of dynamic memory that is allocated,
+the kernel can provide allocation limits.
+One reason given for having separate allocators is that
+no single allocator could starve the rest of the kernel of all
+its available memory and thus a single runaway
+client could not paralyze the system.
+By putting limits on each type of memory,
+the single general purpose memory allocator can provide the same
+protection against memory starvation.\(dg
+.FS
+\(dgOne might seriously ask the question what good it is if ``only''
+one subsystem within the kernel hangs if it is something like the
+network on a diskless workstation.
+.FE
+.PP
+\*(Lb shows the memory usage of the kernel over a one day period
+on a general timesharing machine at Berkeley.
+The ``In Use'', ``Free'', and ``Mem Use'' fields are instantaneous values;
+the ``Requests'' field is the number of allocations since system startup;
+the ``High Use'' field is the maximum value of
+the ``Mem Use'' field since system startup.
+The figure demonstrates that most
+allocations are for small objects.
+Large allocations occur infrequently,
+and are typically for long-lived objects
+such as buffers to hold the superblock for
+a mounted file system.
+Thus, a memory allocator only needs to be
+fast for small pieces of memory.
+.H 1 "Implementation of the Kernel Memory Allocator
+.PP
+In reviewing the available memory allocators,
+none of their strategies could be used without some modification.
+The kernel memory allocator that we ended up with is a hybrid
+of the fast memory allocator found in the 4.2BSD C library
+and a slower but more-memory-efficient first-fit allocator.
+.PP
+Small allocations are done using the 4.2BSD power-of-two list strategy;
+the typical allocation requires only a computation of
+the list to use and the removal of an element if it is available,
+so it is quite fast.
+Macros are provided to avoid the cost of a subroutine call.
+Only if the request cannot be fulfilled from a list is a call
+made to the allocator itself.
+To ensure that the allocator is always called for large requests,
+the lists corresponding to large allocations are always empty.
+Appendix A shows the data structures and implementation of the macros.
+.PP
+Similarly, freeing a block of memory can be done with a macro.
+The macro computes the list on which to place the request
+and puts it there.
+The free routine is called only if the block of memory is
+considered to be a large allocation.
+Including the cost of blocking out interrupts,
+the allocation and freeing macros generate respectively
+only nine and sixteen (simple) VAX instructions.
+.PP
+Because of the inefficiency of power-of-two allocation strategies
+for large allocations,
+a different strategy is used for allocations larger than two kilobytes.
+The selection of two kilobytes is derived from our statistics on
+the utilization of memory within the kernel,
+that showed that 95 to 98% of allocations are of size one kilobyte or less.
+A frequent caller of the memory allocator
+(the name translation function)
+always requests a one kilobyte block.
+Additionally the allocation method for large blocks is based on allocating
+pieces of memory in multiples of pages.
+Consequently the actual allocation size for requests of size
+$2~times~pagesize$ or less are identical.\(dg
+.FS
+\(dgTo understand why this number is $size 8 {2~times~pagesize}$ one
+observes that the power-of-two algorithm yields sizes of 1, 2, 4, 8, \&...
+pages while the large block algorithm that allocates in multiples
+of pages yields sizes of 1, 2, 3, 4, \&... pages.
+Thus for allocations of sizes between one and two pages
+both algorithms use two pages;
+it is not until allocations of sizes between two and three pages
+that a difference emerges where the power-of-two algorithm will use
+four pages while the large block algorithm will use three pages.
+.FE
+In 4.3BSD on the VAX, the (software) page size is one kilobyte,
+so two kilobytes is the smallest logical cutoff.
+.PP
+Large allocations are first rounded up to be a multiple of the page size.
+The allocator then uses a first-fit algorithm to find space in the
+kernel address arena set aside for dynamic allocations.
+Thus a request for a five kilobyte piece of memory will use exactly
+five pages of memory rather than eight kilobytes as with
+the power-of-two allocation strategy.
+When a large piece of memory is freed,
+the memory pages are returned to the free memory pool,
+and the address space is returned to the kernel address arena
+where it is coalesced with adjacent free pieces.
+.PP
+Another technique to improve both the efficiency of memory utilization
+and the speed of allocation
+is to cluster same-sized small allocations on a page.
+When a list for a power-of-two allocation is empty,
+a new page is allocated and divided into pieces of the needed size.
+This strategy speeds future allocations as several pieces of memory
+become available as a result of the call into the allocator.
+.PP
+.FI "Calculation of allocation size"
+.so alloc.fig
+.Fe
+Because the size is not specified when a block of memory is freed,
+the allocator must keep track of the sizes of the pieces it has handed out.
+The 4.2BSD user-level allocator stores the size of each block
+in a header just before the allocation.
+However, this strategy doubles the memory requirement for allocations that
+require a power-of-two-sized block.
+Therefore,
+instead of storing the size of each piece of memory with the piece itself,
+the size information is associated with the memory page.
+\*(Lb shows how the kernel determines
+the size of a piece of memory that is being freed,
+by calculating the page in which it resides,
+and looking up the size associated with that page.
+Eliminating the cost of the overhead per piece improved utilization
+far more than expected.
+The reason is that many allocations in the kernel are for blocks of
+memory whose size is exactly a power of two.
+These requests would be nearly doubled if the user-level strategy were used.
+Now they can be accommodated with no wasted memory.
+.PP
+The allocator can be called both from the top half of the kernel,
+which is willing to wait for memory to become available,
+and from the interrupt routines in the bottom half of the kernel
+that cannot wait for memory to become available.
+Clients indicate their willingness (and ability) to wait with a flag
+to the allocation routine.
+For clients that are willing to wait,
+the allocator guarrentees that their request will succeed.
+Thus, these clients can need not check the return value from the allocator.
+If memory is unavailable and the client cannot wait,
+the allocator returns a null pointer.
+These clients must be prepared to cope with this
+(hopefully infrequent) condition
+(usually by giving up and hoping to do better later).
+.H 1 "Results of the Implementation
+.PP
+The new memory allocator was written about a year ago.
+Conversion from the old memory allocators to the new allocator
+has been going on ever since.
+Many of the special purpose allocators have been eliminated.
+This list includes
+.RN calloc ,
+.RN wmemall ,
+and
+.RN zmemall .
+Many of the special purpose memory allocators built on
+top of other allocators have also been eliminated.
+For example, the allocator that was built on top of the buffer pool allocator
+.RN geteblk
+to allocate pathname buffers in
+.RN namei
+has been eliminated.
+Because the typical allocation is so fast,
+we have found that none of the special purpose pools are needed.
+Indeed, the allocation is about the same as the previous cost of
+allocating buffers from the network pool (\fImbuf\fP\^s).
+Consequently applications that used to allocate network
+buffers for their own uses have been switched over to using
+the general purpose allocator without increasing their running time.
+.PP
+Quantifying the performance of the allocator is difficult because
+it is hard to measure the amount of time spent allocating
+and freeing memory in the kernel.
+The usual approach is to compile a kernel for profiling
+and then compare the running time of the routines that
+implemented the old abstraction versus those that implement the new one.
+The old routines are difficult to quantify because
+individual routines were used for more than one purpose.
+For example, the
+.RN geteblk
+routine was used both to allocate one kilobyte memory blocks
+and for its intended purpose of providing buffers to the filesystem.
+Differentiating these uses is often difficult.
+To get a measure of the cost of memory allocation before
+putting in our new allocator,
+we summed up the running time of all the routines whose
+exclusive task was memory allocation.
+To this total we added the fraction
+of the running time of the multi-purpose routines that could
+clearly be identified as memory allocation usage.
+This number showed that approximately three percent of
+the time spent in the kernel could be accounted to memory allocation.
+.PP
+The new allocator is difficult to measure
+because the usual case of the memory allocator is implemented as a macro.
+Thus, its running time is a small fraction of the running time of the
+numerous routines in the kernel that use it.
+To get a bound on the cost,
+we changed the macro always to call the memory allocation routine.
+Running in this mode, the memory allocator accounted for six percent
+of the time spent in the kernel.
+Factoring out the cost of the statistics collection and the
+subroutine call overhead for the cases that could
+normally be handled by the macro,
+we estimate that the allocator would account for
+at most four percent of time in the kernel.
+These measurements show that the new allocator does not introduce
+significant new run-time costs.
+.PP
+The other major success has been in keeping the size information
+on a per-page basis.
+This technique allows the most frequently requested sizes to be
+allocated without waste.
+It also reduces the amount of bookkeeping information associated
+with the allocator to four kilobytes of information
+per megabyte of memory under management (with a one kilobyte page size).
+.H 1 "Future Work
+.PP
+Our next project is to convert many of the static
+kernel tables to be dynamically allocated.
+Static tables include the process table, the file table,
+and the mount table.
+Making these tables dynamic will have two benefits.
+First, it will reduce the amount of memory
+that must be statically allocated at boot time.
+Second, it will eliminate the arbitrary upper limit imposed
+by the current static sizing
+(although a limit will be retained to constrain runaway clients).
+Other researchers have already shown the memory savings
+achieved by this conversion [Rodriguez88].
+.PP
+Under the current implementation,
+memory is never moved from one size list to another.
+With the 4.2BSD memory allocator this causes problems,
+particularly for large allocations where a process may use
+a quarter megabyte piece of memory once,
+which is then never available for any other size request.
+In our hybrid scheme,
+memory can be shuffled between large requests so that large blocks
+of memory are never stranded as they are with the 4.2BSD allocator.
+However, pages allocated to small requests are allocated once
+to a particular size and never changed thereafter.
+If a burst of requests came in for a particular size,
+that size would acquire a large amount of memory
+that would then not be available for other future requests.
+.PP
+In practice, we do not find that the free lists become too large.
+However, we have been investigating ways to handle such problems
+if they occur in the future.
+Our current investigations involve a routine
+that can run as part of the idle loop that would sort the elements
+on each of the free lists into order of increasing address.
+Since any given page has only one size of elements allocated from it,
+the effect of the sorting would be to sort the list into distinct pages.
+When all the pieces of a page became free,
+the page itself could be released back to the free pool so that
+it could be allocated to another purpose.
+Although there is no guarantee that all the pieces of a page would ever
+be freed,
+most allocations are short-lived, lasting only for the duration of
+an open file descriptor, an open network connection, or a system call.
+As new allocations would be made from the page sorted to
+the front of the list,
+return of elements from pages at the back would eventually
+allow pages later in the list to be freed.
+.PP
+Two of the traditional UNIX
+memory allocators remain in the current system.
+The terminal subsystem uses \fIclist\fP\^s (character lists).
+That part of the system is expected to undergo major revision within
+the the next year or so, and it will probably be changed to use
+\fImbuf\fP\^s as it is merged into the network system.
+The other major allocator that remains is
+.RN getblk ,
+the routine that manages the filesystem buffer pool memory
+and associated control information.
+Only the filesystem uses
+.RN getblk
+in the current system;
+it manages the constant-sized buffer pool.
+We plan to merge the filesystem buffer cache into the virtual memory system's
+page cache in the future.
+This change will allow the size of the buffer pool to be changed
+according to memory load,
+but will require a policy for balancing memory needs
+with filesystem cache performance.
+.H 1 "Acknowledgments
+.PP
+In the spirit of community support,
+we have made various versions of our allocator available to our test sites.
+They have been busily burning it in and giving
+us feedback on their experiences.
+We acknowledge their invaluable input.
+The feedback from the Usenix program committee on the initial draft of
+our paper suggested numerous important improvements.
+.H 1 "References
+.LP
+.IP Korn85 \w'Rodriguez88\0\0'u
+David Korn, Kiem-Phong Vo,
+``In Search of a Better Malloc''
+\fIProceedings of the Portland Usenix Conference\fP,
+pp 489-506, June 1985.
+.IP McKusick85
+M. McKusick, M. Karels, S. Leffler,
+``Performance Improvements and Functional Enhancements in 4.3BSD''
+\fIProceedings of the Portland Usenix Conference\fP,
+pp 519-531, June 1985.
+.IP Rodriguez88
+Robert Rodriguez, Matt Koehler, Larry Palmer, Ricky Palmer,
+``A Dynamic UNIX Operating System''
+\fIProceedings of the San Francisco Usenix Conference\fP,
+June 1988.
+.IP Thompson78
+Ken Thompson,
+``UNIX Implementation''
+\fIBell System Technical Journal\fP, volume 57, number 6,
+pp 1931-1946, 1978.
diff --git a/share/doc/papers/kernmalloc/spell.ok b/share/doc/papers/kernmalloc/spell.ok
new file mode 100644
index 0000000..10c3ab7
--- /dev/null
+++ b/share/doc/papers/kernmalloc/spell.ok
@@ -0,0 +1,57 @@
+BUCKETINDX
+CLBYTES
+CM
+Karels
+Kiem
+Koehler
+Korn
+Korn85
+MAXALLOCSAVE
+MAXALLOCSIZE
+MAXKMEM
+MINALLOCSIZE
+MINBUCKET
+Matt
+McKusick
+McKusick85
+Mem
+Phong
+Ricky
+Rodriguez88
+S.Leffler
+Thompson78
+ULTRIX
+Usenix
+VAX
+Vo
+arptbl
+caddr
+devbuf
+extern
+fragtbl
+freelist
+geteblk
+indx
+ioctlops
+kb
+kbp
+kmembase
+kmembuckets
+kmemsizes
+ks
+ksp
+mbuf
+mbufs
+namei
+pagecnt
+pathname
+pcb
+pp
+routetbl
+runtime
+splimp
+splx
+superblk
+temp
+wmemall
+zmemall
diff --git a/share/doc/papers/kernmalloc/usage.tbl b/share/doc/papers/kernmalloc/usage.tbl
new file mode 100644
index 0000000..c5ebdfe
--- /dev/null
+++ b/share/doc/papers/kernmalloc/usage.tbl
@@ -0,0 +1,75 @@
+.\" Copyright (c) 1988 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)usage.tbl 5.1 (Berkeley) 4/16/91
+.\"
+.TS
+box;
+c s s s
+c c c c
+n n n n.
+Memory statistics by bucket size
+=
+Size In Use Free Requests
+_
+128 329 39 3129219
+256 0 0 0
+512 4 0 16
+1024 17 5 648771
+2048 13 0 13
+2049\-4096 0 0 157
+4097\-8192 2 0 103
+8193\-16384 0 0 0
+16385\-32768 1 0 1
+.TE
+.DE
+.DS B
+.TS
+box;
+c s s s s
+c c c c c
+c n n n n.
+Memory statistics by type
+=
+Type In Use Mem Use High Use Requests
+_
+mbuf 6 1K 17K 3099066
+devbuf 13 53K 53K 13
+socket 37 5K 6K 1275
+pcb 55 7K 8K 1512
+routetbl 229 29K 29K 2424
+fragtbl 0 0K 1K 404
+zombie 3 1K 1K 24538
+namei 0 0K 5K 648754
+ioctlops 0 0K 1K 12
+superblk 24 34K 34K 24
+temp 0 0K 8K 258
+.TE
diff --git a/share/doc/papers/kerntune/0.t b/share/doc/papers/kerntune/0.t
new file mode 100644
index 0000000..90fa2bf
--- /dev/null
+++ b/share/doc/papers/kerntune/0.t
@@ -0,0 +1,129 @@
+.\" Copyright (c) 1984 M. K. McKusick
+.\" Copyright (c) 1984 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 1.2 (Berkeley) 11/8/90
+.\"
+.EQ
+delim $$
+.EN
+.if n .ND
+.TL
+Using gprof to Tune the 4.2BSD Kernel
+.AU
+Marshall Kirk McKusick
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.AB
+This paper describes how the \fIgprof\fP profiler
+accounts for the running time of called routines
+in the running time of the routines that call them.
+It then explains how to configure a profiling kernel on
+the 4.2 Berkeley Software Distribution of
+.UX
+for the VAX\(dd
+.FS
+\(dd VAX is a trademark of Digital Equipment Corporation.
+.FE
+and discusses tradeoffs in techniques for collecting
+profile data.
+\fIGprof\fP identifies problems
+that severely affects the overall performance of the kernel.
+Once a potential problem areas is identified
+benchmark programs are devised to highlight the bottleneck.
+These benchmarks verify that the problem exist and provide
+a metric against which to validate proposed solutions.
+Two caches are added to the kernel to alleviate the bottleneck
+and \fIgprof\fP is used to validates their effectiveness.
+.AE
+.LP
+.de PT
+.lt \\n(LLu
+.pc %
+.nr PN \\n%
+.tl '\\*(LH'\\*(CH'\\*(RH'
+.lt \\n(.lu
+..
+.af PN i
+.ds LH 4.2BSD Performance
+.ds RH Contents
+.bp 1
+.if t .ds CF May 21, 1984
+.if t .ds LF
+.if t .ds RF McKusick
+.ce
+.B "TABLE OF CONTENTS"
+.LP
+.sp 1
+.nf
+.B "1. Introduction"
+.LP
+.sp .5v
+.nf
+.B "2. The \fIgprof\fP Profiler"
+\0.1. Data Presentation"
+\0.1.1. The Flat Profile
+\0.1.2. The Call Graph Profile
+\0.2 Profiling the Kernel
+.LP
+.sp .5v
+.nf
+.B "3. Using \fIgprof\fP to Improve Performance
+\0.1. Using the Profiler
+\0.2. An Example of Tuning
+.LP
+.sp .5v
+.nf
+.B "4. Conclusions"
+.LP
+.sp .5v
+.nf
+.B Acknowledgements
+.LP
+.sp .5v
+.nf
+.B References
+.af PN 1
+.bp 1
+.de _d
+.if t .ta .6i 2.1i 2.6i
+.\" 2.94 went to 2.6, 3.64 to 3.30
+.if n .ta .84i 2.6i 3.30i
+..
+.de _f
+.if t .ta .5i 1.25i 2.5i
+.\" 3.5i went to 3.8i
+.if n .ta .7i 1.75i 3.8i
+..
diff --git a/share/doc/papers/kerntune/1.t b/share/doc/papers/kerntune/1.t
new file mode 100644
index 0000000..d78c568
--- /dev/null
+++ b/share/doc/papers/kerntune/1.t
@@ -0,0 +1,48 @@
+.\" Copyright (c) 1984 M. K. McKusick
+.\" Copyright (c) 1984 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 1.2 (Berkeley) 11/8/90
+.\"
+.ds RH Introduction
+.NH 1
+Introduction
+.PP
+The purpose of this paper is to describe the tools and techniques
+that are available for improving the performance of the the kernel.
+The primary tool used to measure the kernel is the hierarchical
+profiler \fIgprof\fP.
+The profiler enables the user to measure the cost of
+the abstractions that the kernel provides to the user.
+Once the expensive abstractions are identified,
+optimizations are postulated to help improve their performance.
+These optimizations are each individually
+verified to insure that they are producing a measurable improvement.
diff --git a/share/doc/papers/kerntune/2.t b/share/doc/papers/kerntune/2.t
new file mode 100644
index 0000000..2857dc2
--- /dev/null
+++ b/share/doc/papers/kerntune/2.t
@@ -0,0 +1,234 @@
+.\" Copyright (c) 1984 M. K. McKusick
+.\" Copyright (c) 1984 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.t 1.3 (Berkeley) 11/8/90
+.\"
+.ds RH The \fIgprof\fP Profiler
+.NH 1
+The \fIgprof\fP Profiler
+.PP
+The purpose of the \fIgprof\fP profiling tool is to
+help the user evaluate alternative implementations
+of abstractions.
+The \fIgprof\fP design takes advantage of the fact that the kernel
+though large, is structured and hierarchical.
+We provide a profile in which the execution time
+for a set of routines that implement an
+abstraction is collected and charged
+to that abstraction.
+The profile can be used to compare and assess the costs of
+various implementations [Graham82] [Graham83].
+.NH 2
+Data presentation
+.PP
+The data is presented to the user in two different formats.
+The first presentation simply lists the routines
+without regard to the amount of time their descendants use.
+The second presentation incorporates the call graph of the
+kernel.
+.NH 3
+The Flat Profile
+.PP
+The flat profile consists of a list of all the routines
+that are called during execution of the kernel,
+with the count of the number of times they are called
+and the number of seconds of execution time for which they
+are themselves accountable.
+The routines are listed in decreasing order of execution time.
+A list of the routines that are never called during execution of
+the kernel is also available
+to verify that nothing important is omitted by
+this profiling run.
+The flat profile gives a quick overview of the routines that are used,
+and shows the routines that are themselves responsible
+for large fractions of the execution time.
+In practice,
+this profile usually shows that no single function
+is overwhelmingly responsible for
+the total time of the kernel.
+Notice that for this profile,
+the individual times sum to the total execution time.
+.NH 3
+The Call Graph Profile
+.PP
+Ideally, we would like to print the call graph of the kernel,
+but we are limited by the two-dimensional nature of our output
+devices.
+We cannot assume that a call graph is planar,
+and even if it is, that we can print a planar version of it.
+Instead, we choose to list each routine,
+together with information about
+the routines that are its direct parents and children.
+This listing presents a window into the call graph.
+Based on our experience,
+both parent information and child information
+is important,
+and should be available without searching
+through the output.
+Figure 1 shows a sample \fIgprof\fP entry.
+.KF
+.DS L
+.TS
+box center;
+c c c c c l l
+c c c c c l l
+c c c c c l l
+l n n n c l l.
+ called/total \ \ parents
+index %time self descendants called+self name index
+ called/total \ \ children
+_
+ 0.20 1.20 4/10 \ \ \s-1CALLER1\s+1 [7]
+ 0.30 1.80 6/10 \ \ \s-1CALLER2\s+1 [1]
+[2] 41.5 0.50 3.00 10+4 \s-1EXAMPLE\s+1 [2]
+ 1.50 1.00 20/40 \ \ \s-1SUB1\s+1 <cycle1> [4]
+ 0.00 0.50 1/5 \ \ \s-1SUB2\s+1 [9]
+ 0.00 0.00 0/5 \ \ \s-1SUB3\s+1 [11]
+.TE
+.ce
+Figure 1. Profile entry for \s-1EXAMPLE\s+1.
+.DE
+.KE
+.PP
+The major entries of the call graph profile are the entries from the
+flat profile, augmented by the time propagated to each
+routine from its descendants.
+This profile is sorted by the sum of the time for the routine
+itself plus the time inherited from its descendants.
+The profile shows which of the higher level routines
+spend large portions of the total execution time
+in the routines that they call.
+For each routine, we show the amount of time passed by each child
+to the routine, which includes time for the child itself
+and for the descendants of the child
+(and thus the descendants of the routine).
+We also show the percentage these times represent of the total time
+accounted to the child.
+Similarly, the parents of each routine are listed,
+along with time,
+and percentage of total routine time,
+propagated to each one.
+.PP
+Cycles are handled as single entities.
+The cycle as a whole is shown as though it were a single routine,
+except that members of the cycle are listed in place of the children.
+Although the number of calls of each member
+from within the cycle are shown,
+they do not affect time propagation.
+When a child is a member of a cycle,
+the time shown is the appropriate fraction of the time
+for the whole cycle.
+Self-recursive routines have their calls broken
+down into calls from the outside and self-recursive calls.
+Only the outside calls affect the propagation of time.
+.PP
+The example shown in Figure 2 is the fragment of a call graph
+corresponding to the entry in the call graph profile listing
+shown in Figure 1.
+.KF
+.DS L
+.so fig2.pic
+.ce
+Figure 2. Example call graph fragment.
+.DE
+.KE
+.PP
+The entry is for routine \s-1EXAMPLE\s+1, which has
+the Caller routines as its parents,
+and the Sub routines as its children.
+The reader should keep in mind that all information
+is given \fIwith respect to \s-1EXAMPLE\s+1\fP.
+The index in the first column shows that \s-1EXAMPLE\s+1
+is the second entry in the profile listing.
+The \s-1EXAMPLE\s+1 routine is called ten times, four times by \s-1CALLER1\s+1,
+and six times by \s-1CALLER2\s+1.
+Consequently 40% of \s-1EXAMPLE\s+1's time is propagated to \s-1CALLER1\s+1,
+and 60% of \s-1EXAMPLE\s+1's time is propagated to \s-1CALLER2\s+1.
+The self and descendant fields of the parents
+show the amount of self and descendant time \s-1EXAMPLE\s+1
+propagates to them (but not the time used by
+the parents directly).
+Note that \s-1EXAMPLE\s+1 calls itself recursively four times.
+The routine \s-1EXAMPLE\s+1 calls routine \s-1SUB1\s+1 twenty times, \s-1SUB2\s+1 once,
+and never calls \s-1SUB3\s+1.
+Since \s-1SUB2\s+1 is called a total of five times,
+20% of its self and descendant time is propagated to \s-1EXAMPLE\s+1's
+descendant time field.
+Because \s-1SUB1\s+1 is a member of \fIcycle 1\fR,
+the self and descendant times
+and call count fraction
+are those for the cycle as a whole.
+Since cycle 1 is called a total of forty times
+(not counting calls among members of the cycle),
+it propagates 50% of the cycle's self and descendant
+time to \s-1EXAMPLE\s+1's descendant time field.
+Finally each name is followed by an index that shows
+where on the listing to find the entry for that routine.
+.NH 2
+Profiling the Kernel
+.PP
+It is simple to build a 4.2BSD kernel that will automatically
+collect profiling information as it operates simply by specifying the
+.B \-p
+option to \fIconfig\fP\|(8) when configuring a kernel.
+The program counter sampling can be driven by the system clock,
+or by an alternate real time clock.
+The latter is highly recommended as use of the system clock results
+in statistical anomalies in accounting for
+the time spent in the kernel clock routine.
+.PP
+Once a profiling system has been booted statistic gathering is
+handled by \fIkgmon\fP\|(8).
+\fIKgmon\fP allows profiling to be started and stopped
+and the internal state of the profiling buffers to be dumped.
+\fIKgmon\fP can also be used to reset the state of the internal
+buffers to allow multiple experiments to be run without
+rebooting the machine.
+The profiling data can then be processed with \fIgprof\fP\|(1)
+to obtain information regarding the system's operation.
+.PP
+A profiled system is about 5-10% larger in its text space because of
+the calls to count the subroutine invocations.
+When the system executes,
+the profiling data is stored in a buffer that is 1.2
+times the size of the text space.
+All the information is summarized in memory,
+it is not necessary to have a trace file
+being continuously dumped to disk.
+The overhead for running a profiled system varies;
+under normal load we see anywhere from 5-25%
+of the system time spent in the profiling code.
+Thus the system is noticeably slower than an unprofiled system,
+yet is not so bad that it cannot be used in a production environment.
+This is important since it allows us to gather data
+in a real environment rather than trying to
+devise synthetic work loads.
diff --git a/share/doc/papers/kerntune/3.t b/share/doc/papers/kerntune/3.t
new file mode 100644
index 0000000..e03236b
--- /dev/null
+++ b/share/doc/papers/kerntune/3.t
@@ -0,0 +1,290 @@
+.\" Copyright (c) 1984 M. K. McKusick
+.\" Copyright (c) 1984 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)3.t 1.2 (Berkeley) 11/8/90
+.\"
+.ds RH Techniques for Improving Performance
+.NH 1
+Techniques for Improving Performance
+.PP
+This section gives several hints on general optimization techniques.
+It then proceeds with an example of how they can be
+applied to the 4.2BSD kernel to improve its performance.
+.NH 2
+Using the Profiler
+.PP
+The profiler is a useful tool for improving
+a set of routines that implement an abstraction.
+It can be helpful in identifying poorly coded routines,
+and in evaluating the new algorithms and code that replace them.
+Taking full advantage of the profiler
+requires a careful examination of the call graph profile,
+and a thorough knowledge of the abstractions underlying
+the kernel.
+.PP
+The easiest optimization that can be performed
+is a small change
+to a control construct or data structure.
+An obvious starting point
+is to expand a small frequently called routine inline.
+The drawback to inline expansion is that the data abstractions
+in the kernel may become less parameterized,
+hence less clearly defined.
+The profiling will also become less useful since the loss of
+routines will make its output more granular.
+.PP
+Further potential for optimization lies in routines that
+implement data abstractions whose total execution
+time is long.
+If the data abstraction function cannot easily be speeded up,
+it may be advantageous to cache its results,
+and eliminate the need to rerun
+it for identical inputs.
+These and other ideas for program improvement are discussed in
+[Bentley81].
+.PP
+This tool is best used in an iterative approach:
+profiling the kernel,
+eliminating one bottleneck,
+then finding some other part of the kernel
+that begins to dominate execution time.
+.PP
+A completely different use of the profiler is to analyze the control
+flow of an unfamiliar section of the kernel.
+By running an example that exercises the unfamiliar section of the kernel,
+and then using \fIgprof\fR, you can get a view of the
+control structure of the unfamiliar section.
+.NH 2
+An Example of Tuning
+.PP
+The first step is to come up with a method for generating
+profile data.
+We prefer to run a profiling system for about a one day
+period on one of our general timesharing machines.
+While this is not as reproducible as a synthetic workload,
+it certainly represents a realistic test.
+We have run one day profiles on several
+occasions over a three month period.
+Despite the long period of time that elapsed
+between the test runs the shape of the profiles,
+as measured by the number of times each system call
+entry point was called, were remarkably similar.
+.PP
+A second alternative is to write a small benchmark
+program to repeated exercise a suspected bottleneck.
+While these benchmarks are not useful as a long term profile
+they can give quick feedback on whether a hypothesized
+improvement is really having an effect.
+It is important to realize that the only real assurance
+that a change has a beneficial effect is through
+long term measurements of general timesharing.
+We have numerous examples where a benchmark program
+suggests vast improvements while the change
+in the long term system performance is negligible,
+and conversely examples in which the benchmark program run more slowly,
+but the long term system performance improves significantly.
+.PP
+An investigation of our long term profiling showed that
+the single most expensive function performed by the kernel
+is path name translation.
+We find that our general time sharing systems do about
+500,000 name translations per day.
+The cost of doing name translation in the original 4.2BSD
+is 24.2 milliseconds,
+representing 40% of the time processing system calls,
+which is 19% of the total cycles in the kernel,
+or 11% of all cycles executed on the machine.
+The times are shown in Figure 3.
+.KF
+.DS L
+.TS
+center box;
+l r r.
+part time % of kernel
+_
+self 14.3 ms/call 11.3%
+child 9.9 ms/call 7.9%
+_
+total 24.2 ms/call 19.2%
+.TE
+.ce
+Figure 3. Call times for \fInamei\fP.
+.DE
+.KE
+.PP
+The system measurements collected showed the
+pathname translation routine, \fInamei\fP,
+was clearly worth optimizing.
+An inspection of \fInamei\fP shows that
+it consists of two nested loops.
+The outer loop is traversed once per pathname component.
+The inner loop performs a linear search through a directory looking
+for a particular pathname component.
+.PP
+Our first idea was to observe that many programs
+step through a directory performing an operation on
+each entry in turn.
+This caused us to modify \fInamei\fP to cache
+the directory offset of the last pathname
+component looked up by a process.
+The cached offset is then used
+as the point at which a search in the same directory
+begins. Changing directories invalidates the cache, as
+does modifying the directory.
+For programs that step sequentially through a directory with
+$N$ files, search time decreases from $O ( N sup 2 )$
+to $O(N)$.
+.PP
+The cost of the cache is about 20 lines of code
+(about 0.2 kilobytes)
+and 16 bytes per process, with the cached data
+stored in a process's \fIuser\fP vector.
+.PP
+As a quick benchmark to verify the effectiveness of the
+cache we ran ``ls \-l''
+on a directory containing 600 files.
+Before the per-process cache this command
+used 22.3 seconds of system time.
+After adding the cache the program used the same amount
+of user time, but the system time dropped to 3.3 seconds.
+.PP
+This change prompted our rerunning a profiled system
+on a machine containing the new \fInamei\fP.
+The results showed that the time in \fInamei\fP
+dropped by only 2.6 ms/call and
+still accounted for 36% of the system call time,
+18% of the kernel, or about 10% of all the machine cycles.
+This amounted to a drop in system time from 57% to about 55%.
+The results are shown in Figure 4.
+.KF
+.DS L
+.TS
+center box;
+l r r.
+part time % of kernel
+_
+self 11.0 ms/call 9.2%
+child 10.6 ms/call 8.9%
+_
+total 21.6 ms/call 18.1%
+.TE
+.ce
+Figure 4. Call times for \fInamei\fP with per-process cache.
+.DE
+.KE
+.PP
+The small performance improvement
+was caused by a low cache hit ratio.
+Although the cache was 90% effective when hit,
+it was only usable on about 25% of the names being translated.
+An additional reason for the small improvement was that
+although the amount of time spent in \fInamei\fP itself
+decreased substantially,
+more time was spent in the routines that it called
+since each directory had to be accessed twice;
+once to search from the middle to the end,
+and once to search from the beginning to the middle.
+.PP
+Most missed names were caused by path name components
+other than the last.
+Thus Robert Elz introduced a system wide cache of most recent
+name translations.
+The cache is keyed on a name and the
+inode and device number of the directory that contains it.
+Associated with each entry is a pointer to the corresponding
+entry in the inode table.
+This has the effect of short circuiting the outer loop of \fInamei\fP.
+For each path name component,
+\fInamei\fP first looks in its cache of recent translations
+for the needed name.
+If it exists, the directory search can be completely eliminated.
+If the name is not recognized,
+then the per-process cache may still be useful in
+reducing the directory search time.
+The two cacheing schemes complement each other well.
+.PP
+The cost of the name cache is about 200 lines of code
+(about 1.2 kilobytes)
+and 44 bytes per cache entry.
+Depending on the size of the system,
+about 200 to 1000 entries will normally be configured,
+using 10-44 kilobytes of physical memory.
+The name cache is resident in memory at all times.
+.PP
+After adding the system wide name cache we reran ``ls \-l''
+on the same directory.
+The user time remained the same,
+however the system time rose slightly to 3.7 seconds.
+This was not surprising as \fInamei\fP
+now had to maintain the cache,
+but was never able to make any use of it.
+.PP
+Another profiled system was created and measurements
+were collected over a one day period. These measurements
+showed a 6 ms/call decrease in \fInamei\fP, with
+\fInamei\fP accounting for only 31% of the system call time,
+16% of the time in the kernel,
+or about 7% of all the machine cycles.
+System time dropped from 55% to about 49%.
+The results are shown in Figure 5.
+.KF
+.DS L
+.TS
+center box;
+l r r.
+part time % of kernel
+_
+self 9.5 ms/call 9.6%
+child 6.1 ms/call 6.1%
+_
+total 15.6 ms/call 15.7%
+.TE
+.ce
+Figure 5. Call times for \fInamei\fP with both caches.
+.DE
+.KE
+.PP
+Statistics on the performance of both caches show
+the large performance improvement is
+caused by the high hit ratio.
+On the profiled system a 60% hit rate was observed in
+the system wide cache. This, coupled with the 25%
+hit rate in the per-process offset cache yielded an
+effective cache hit rate of 85%.
+While the system wide cache reduces both the amount of time in
+the routines that \fInamei\fP calls as well as \fInamei\fP itself
+(since fewer directories need to be accessed or searched),
+it is interesting to note that the actual percentage of system
+time spent in \fInamei\fP itself increases even though the
+actual time per call decreases.
+This is because less total time is being spent in the kernel,
+hence a smaller absolute time becomes a larger total percentage.
diff --git a/share/doc/papers/kerntune/4.t b/share/doc/papers/kerntune/4.t
new file mode 100644
index 0000000..fcd0ad0
--- /dev/null
+++ b/share/doc/papers/kerntune/4.t
@@ -0,0 +1,99 @@
+.\" Copyright (c) 1984 M. K. McKusick
+.\" Copyright (c) 1984 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)4.t 1.2 (Berkeley) 11/8/90
+.\"
+.ds RH Conclusions
+.NH 1
+Conclusions
+.PP
+We have created a profiler that aids in the evaluation
+of the kernel.
+For each routine in the kernel,
+the profile shows the extent to which that routine
+helps support various abstractions,
+and how that routine uses other abstractions.
+The profile assesses the cost of routines
+at all levels of the kernel decomposition.
+The profiler is easily used,
+and can be compiled into the kernel.
+It adds only five to thirty percent execution overhead to the kernel
+being profiled,
+produces no additional output while the kernel is running
+and allows the kernel to be measured in its real environment.
+Kernel profiles can be used to identify bottlenecks in performance.
+We have shown how to improve performance
+by caching recently calculated name translations.
+The combined caches added to the name translation process
+reduce the average cost of translating a pathname to an inode by 35%.
+These changes reduce the percentage of time spent running
+in the system by nearly 9%.
+.nr H2 1
+.ds RH Acknowledgements
+.SH
+\s+2Acknowledgements\s0
+.PP
+I would like to thank Robert Elz for sharing his ideas and
+his code for cacheing system wide names.
+Thanks also to all the users at Berkeley who provided all the
+input to generate the kernel profiles.
+This work was supported by
+the Defense Advance Research Projects Agency (DoD) under
+Arpa Order No. 4031 monitored by Naval Electronic System Command under
+Contract No. N00039-82-C-0235.
+.ds RH References
+.nr H2 1
+.sp 2
+.SH
+\s+2References\s-2
+.LP
+.IP [Bentley81] 20
+Bentley, J. L.,
+``Writing Efficient Code'',
+Department of Computer Science,
+Carnegie-Mellon University,
+Pittsburgh, Pennsylvania,
+CMU-CS-81-116, 1981.
+.IP [Graham82] 20
+Graham, S., Kessler, P., McKusick, M.,
+``gprof: A Call Graph Execution Profiler'',
+Proceedings of the SIGPLAN '82 Symposium on Compiler Construction,
+Volume 17, Number 6, June 1982. pp 120-126
+.IP [Graham83] 20
+Graham, S., Kessler, P., McKusick, M.,
+``An Execution Profiler for Modular Programs''
+Software - Practice and Experience,
+Volume 13, 1983. pp 671-685
+.IP [Ritchie74] 20
+Ritchie, D. M. and Thompson, K.,
+``The UNIX Time-Sharing System'',
+CACM 17, 7. July 1974. pp 365-375
diff --git a/share/doc/papers/kerntune/Makefile b/share/doc/papers/kerntune/Makefile
new file mode 100644
index 0000000..f1d21cd
--- /dev/null
+++ b/share/doc/papers/kerntune/Makefile
@@ -0,0 +1,10 @@
+# @(#)Makefile 1.5 (Berkeley) 6/8/93
+
+DIR= papers/kerntune
+SRCS= 0.t 1.t 2.t 3.t 4.t
+MACROS= -ms
+
+paper.ps: ${SRCS}
+ ${SOELIM} ${SRCS} | ${PIC} | ${TBL} | ${EQN} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/kerntune/fig2.pic b/share/doc/papers/kerntune/fig2.pic
new file mode 100644
index 0000000..6731ca9
--- /dev/null
+++ b/share/doc/papers/kerntune/fig2.pic
@@ -0,0 +1,57 @@
+.\" Copyright (c) 1987 M. K. McKusick
+.\" Copyright (c) 1987 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)fig2.pic 1.2 (Berkeley) 11/8/90
+.\"
+.PS
+ellipse ht .3i wid .75i "\s-1CALLER1\s+1"
+ellipse ht .3i wid .75i "\s-1CALLER2\s+1" at 1st ellipse + (2i,0i)
+ellipse ht .3i wid .8i "\s-1EXAMPLE\s+1" at 1st ellipse + (1i,-.5i)
+ellipse ht .3i wid .5i "\s-1SUB1\s+1" at 1st ellipse - (0i,1i)
+ellipse ht .3i wid .5i "\s-1SUB2\s+1" at 3rd ellipse - (0i,.5i)
+ellipse ht .3i wid .5i "\s-1SUB3\s+1" at 2nd ellipse - (0i,1i)
+line <- from 1st ellipse up .5i left .5i chop .1875i
+line <- from 1st ellipse up .5i right .5i chop .1875i
+line <- from 2nd ellipse up .5i left .5i chop .1875i
+line <- from 2nd ellipse up .5i right .5i chop .1875i
+arrow from 1st ellipse to 3rd ellipse chop
+arrow from 2nd ellipse to 3rd ellipse chop
+arrow from 3rd ellipse to 4th ellipse chop
+arrow from 3rd ellipse to 5th ellipse chop .15i chop .15i
+arrow from 3rd ellipse to 6th ellipse chop
+arrow from 4th ellipse down .5i left .5i chop .1875i
+arrow from 4th ellipse down .5i right .5i chop .1875i
+arrow from 5th ellipse down .5i left .5i chop .1875i
+arrow from 5th ellipse down .5i right .5i chop .1875i
+arrow from 6th ellipse down .5i left .5i chop .1875i
+arrow from 6th ellipse down .5i right .5i chop .1875i
+.PE
diff --git a/share/doc/papers/memfs/0.t b/share/doc/papers/memfs/0.t
new file mode 100644
index 0000000..d476f17
--- /dev/null
+++ b/share/doc/papers/memfs/0.t
@@ -0,0 +1,86 @@
+.\" Copyright (c) 1990 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 5.1 (Berkeley) 4/16/91
+.\"
+.rm CM
+.nr PO 1.25i
+.ds CH "
+.ds CF "%
+.nr Fn 0 1
+.ds b3 4.3\s-1BSD\s+1
+.de KI
+.ds Lb "Fig. \\n+(Fn
+.KF
+.ce 1
+Figure \\n(Fn - \\$1.
+..
+.de SM
+\\s-1\\$1\\s+1\\$2
+..
+.de NM
+\&\fI\\$1\fP\\$2
+..
+.de RN
+\&\fI\\$1\fP\^(\^)\\$2
+..
+.de PN
+\&\fB\\$1\fP\\$2
+..
+.TL
+A Pageable Memory Based Filesystem
+.AU
+Marshall Kirk McKusick
+.AU
+Michael J. Karels
+.AU
+Keith Bostic
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.sp
+email: mckusick@cs.Berkeley.EDU
+telephone: 415-642-4948
+.AB
+This paper describes the motivations for memory-based filesystems.
+It compares techniques used to implement them and
+describes the drawbacks of using dedicated memory to
+support such filesystems.
+To avoid the drawbacks of using dedicated memory,
+it discusses building a simple memory-based
+filesystem in pageable memory.
+It details the performance characteristics of this filesystem
+and concludes with areas for future work.
+.AE
+.LP
diff --git a/share/doc/papers/memfs/1.t b/share/doc/papers/memfs/1.t
new file mode 100644
index 0000000..a065844
--- /dev/null
+++ b/share/doc/papers/memfs/1.t
@@ -0,0 +1,392 @@
+.\" Copyright (c) 1990 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 5.1 (Berkeley) 4/16/91
+.\"
+.nr PS 11
+.nr VS 13
+.SH
+Introduction
+.PP
+This paper describes the motivation for and implementation of
+a memory-based filesystem.
+Memory-based filesystems have existed for a long time;
+they have generally been marketed as RAM disks or sometimes
+as software packages that use the machine's general purpose memory.
+.[
+white
+.]
+.PP
+A RAM disk is designed to appear like any other disk peripheral
+connected to a machine.
+It is normally interfaced to the processor through the I/O bus
+and is accessed through a device driver similar or sometimes identical
+to the device driver used for a normal magnetic disk.
+The device driver sends requests for blocks of data to the device
+and the requested data is then DMA'ed to or from the requested block.
+Instead of storing its data on a rotating magnetic disk,
+the RAM disk stores its data in a large array of random access memory
+or bubble memory.
+Thus, the latency of accessing the RAM disk is nearly zero
+compared to the 15-50 milliseconds of latency incurred when
+access rotating magnetic media.
+RAM disks also have the benefit of being able to transfer data at
+the maximum DMA rate of the system,
+while disks are typically limited by the rate that the data passes
+under the disk head.
+.PP
+Software packages simulating RAM disks operate by allocating
+a fixed partition of the system memory.
+The software then provides a device driver interface similar
+to the one described for hardware RAM disks,
+except that it uses memory-to-memory copy instead of DMA to move
+the data between the RAM disk and the system buffers,
+or it maps the contents of the RAM disk into the system buffers.
+Because the memory used by the RAM disk is not available for
+other purposes, software RAM-disk solutions are used primarily
+for machines with limited addressing capabilities such as PC's
+that do not have an effective way of using the extra memory anyway.
+.PP
+Most software RAM disks lose their contents when the system is powered
+down or rebooted.
+The contents can be saved by using battery backed-up memory,
+by storing critical filesystem data structures in the filesystem,
+and by running a consistency check program after each reboot.
+These conditions increase the hardware cost
+and potentially slow down the speed of the disk.
+Thus, RAM-disk filesystems are not typically
+designed to survive power failures;
+because of their volatility, their usefulness is limited to transient
+or easily recreated information such as might be found in
+.PN /tmp .
+Their primary benefit is that they have higher throughput
+than disk based filesystems.
+.[
+smith
+.]
+This improved throughput is particularly useful for utilities that
+make heavy use of temporary files, such as compilers.
+On fast processors, nearly half of the elapsed time for a compilation
+is spent waiting for synchronous operations required for file
+creation and deletion.
+The use of the memory-based filesystem nearly eliminates this waiting time.
+.PP
+Using dedicated memory to exclusively support a RAM disk
+is a poor use of resources.
+The overall throughput of the system can be improved
+by using the memory where it is getting the highest access rate.
+These needs may shift between supporting process virtual address spaces
+and caching frequently used disk blocks.
+If the memory is dedicated to the filesystem,
+it is better used in a buffer cache.
+The buffer cache permits faster access to the data
+because it requires only a single memory-to-memory copy
+from the kernel to the user process.
+The use of memory is used in a RAM-disk configuration may require two
+memory-to-memory copies, one from the RAM disk
+to the buffer cache,
+then another copy from the buffer cache to the user process.
+.PP
+The new work being presented in this paper is building a prototype
+RAM-disk filesystem in pageable memory instead of dedicated memory.
+The goal is to provide the speed benefits of a RAM disk
+without paying the performance penalty inherent in dedicating
+part of the physical memory on the machine to the RAM disk.
+By building the filesystem in pageable memory,
+it competes with other processes for the available memory.
+When memory runs short, the paging system pushes its
+least-recently-used pages to backing store.
+Being pageable also allows the filesystem to be much larger than
+would be practical if it were limited by the amount of physical
+memory that could be dedicated to that purpose.
+We typically operate our
+.PN /tmp
+with 30 to 60 megabytes of space
+which is larger than the amount of memory on the machine.
+This configuration allows small files to be accessed quickly,
+while still allowing
+.PN /tmp
+to be used for big files,
+although at a speed more typical of normal, disk-based filesystems.
+.PP
+An alternative to building a memory-based filesystem would be to have
+a filesystem that never did operations synchronously and never
+flushed its dirty buffers to disk.
+However, we believe that such a filesystem would either
+use a disproportionately large percentage of the buffer
+cache space, to the detriment of other filesystems,
+or would require the paging system to flush its dirty pages.
+Waiting for other filesystems to push dirty pages
+subjects them to delays while waiting for the pages to be written.
+We await the results of others trying this approach.
+.[
+Ohta
+.]
+.SH
+Implementation
+.PP
+The current implementation took less time to write than did this paper.
+It consists of 560 lines of kernel code (1.7K text + data)
+and some minor modifications to the program that builds
+disk based filesystems, \fInewfs\fP.
+A condensed version of the kernel code for the
+memory-based filesystem are reproduced in Appendix 1.
+.PP
+A filesystem is created by invoking the modified \fInewfs\fP, with
+an option telling it to create a memory-based filesystem.
+It allocates a section of virtual address space of the requested
+size and builds a filesystem in the memory
+instead of on a disk partition.
+When built, it does a \fImount\fP system call specifying a filesystem type of
+.SM MFS
+(Memory File System).
+The auxiliary data parameter to the mount call specifies a pointer
+to the base of the memory in which it has built the filesystem.
+(The auxiliary data parameter used by the local filesystem, \fIufs\fP,
+specifies the block device containing the filesystem.)
+.PP
+The mount system call allocates and initializes a mount table
+entry and then calls the filesystem-specific mount routine.
+The filesystem-specific routine is responsible for doing
+the mount and initializing the filesystem-specific
+portion of the mount table entry.
+The memory-based filesystem-specific mount routine,
+.RN mfs_mount ,
+is shown in Appendix 1.
+It allocates a block-device vnode to represent the memory disk device.
+In the private area of this vnode it stores the base address of
+the filesystem and the process identifier of the \fInewfs\fP process
+for later reference when doing I/O.
+It also initializes an I/O list that it
+uses to record outstanding I/O requests.
+It can then call the \fIufs\fP filesystem mount routine,
+passing the special block-device vnode that it has created
+instead of the usual disk block-device vnode.
+The mount proceeds just as any other local mount, except that
+requests to read from the block device are vectored through
+.RN mfs_strategy
+(described below) instead of the usual
+.RN spec_strategy
+block device I/O function.
+When the mount is completed,
+.RN mfs_mount
+does not return as most other filesystem mount functions do;
+instead it sleeps in the kernel awaiting I/O requests.
+Each time an I/O request is posted for the filesystem,
+a wakeup is issued for the corresponding \fInewfs\fP process.
+When awakened, the process checks for requests on its buffer list.
+A read request is serviced by copying data from the section of the
+\fInewfs\fP address space corresponding to the requested disk block
+to the kernel buffer.
+Similarly a write request is serviced by copying data to the section of the
+\fInewfs\fP address space corresponding to the requested disk block
+from the kernel buffer.
+When all the requests have been serviced, the \fInewfs\fP
+process returns to sleep to await more requests.
+.PP
+Once mounted,
+all operations on files in the memory-based filesystem are handled
+by the \fIufs\fP filesystem code until they get to the point where the
+filesystem needs to do I/O on the device.
+Here, the filesystem encounters the second piece of the
+memory-based filesystem.
+Instead of calling the special-device strategy routine,
+it calls the memory-based strategy routine,
+.RN mfs_strategy .
+Usually,
+the request is serviced by linking the buffer onto the
+I/O list for the memory-based filesystem
+vnode and sending a wakeup to the \fInewfs\fP process.
+This wakeup results in a context-switch to the \fInewfs\fP
+process, which does a copyin or copyout as described above.
+The strategy routine must be careful to check whether
+the I/O request is coming from the \fInewfs\fP process itself, however.
+Such requests happen during mount and unmount operations,
+when the kernel is reading and writing the superblock.
+Here,
+.RN mfs_strategy
+must do the I/O itself to avoid deadlock.
+.PP
+The final piece of kernel code to support the
+memory-based filesystem is the close routine.
+After the filesystem has been successfully unmounted,
+the device close routine is called.
+For a memory-based filesystem, the device close routine is
+.RN mfs_close .
+This routine flushes any pending I/O requests,
+then sets the I/O list head to a special value
+that is recognized by the I/O servicing loop in
+.RN mfs_mount
+as an indication that the filesystem is unmounted.
+The
+.RN mfs_mount
+routine exits, in turn causing the \fInewfs\fP process
+to exit, resulting in the filesystem vanishing in a cloud of dirty pages.
+.PP
+The paging of the filesystem does not require any additional
+code beyond that already in the kernel to support virtual memory.
+The \fInewfs\fP process competes with other processes on an equal basis
+for the machine's available memory.
+Data pages of the filesystem that have not yet been used
+are zero-fill-on-demand pages that do not occupy memory,
+although they currently allocate space in backing store.
+As long as memory is plentiful, the entire contents of the filesystem
+remain memory resident.
+When memory runs short, the oldest pages of \fInewfs\fP will be
+pushed to backing store as part of the normal paging activity.
+The pages that are pushed usually hold the contents of
+files that have been created in the memory-based filesystem
+but have not been recently accessed (or have been deleted).
+.[
+leffler
+.]
+.SH
+Performance
+.PP
+The performance of the current memory-based filesystem is determined by
+the memory-to-memory copy speed of the processor.
+Empirically we find that the throughput is about 45% of this
+memory-to-memory copy speed.
+The basic set of steps for each block written is:
+.IP 1)
+memory-to-memory copy from the user process doing the write to a kernel buffer
+.IP 2)
+context-switch to the \fInewfs\fP process
+.IP 3)
+memory-to-memory copy from the kernel buffer to the \fInewfs\fP address space
+.IP 4)
+context switch back to the writing process
+.LP
+Thus each write requires at least two memory-to-memory copies
+accounting for about 90% of the
+.SM CPU
+time.
+The remaining 10% is consumed in the context switches and
+the filesystem allocation and block location code.
+The actual context switch count is really only about half
+of the worst case outlined above because
+read-ahead and write-behind allow multiple blocks
+to be handled with each context switch.
+.PP
+On the six-\c
+.SM "MIPS CCI"
+Power 6/32 machine,
+the raw reading and writing speed is only about twice that of
+a regular disk-based filesystem.
+However, for processes that create and delete many files,
+the speedup is considerably greater.
+The reason for the speedup is that the filesystem
+must do two synchronous operations to create a file,
+first writing the allocated inode to disk, then creating the
+directory entry.
+Deleting a file similarly requires at least two synchronous
+operations.
+Here, the low latency of the memory-based filesystem is
+noticeable compared to the disk-based filesystem,
+as a synchronous operation can be done with
+just two context switches instead of incurring the disk latency.
+.SH
+Future Work
+.PP
+The most obvious shortcoming of the current implementation
+is that filesystem blocks are copied twice, once between the \fInewfs\fP
+process' address space and the kernel buffer cache,
+and once between the kernel buffer and the requesting process.
+These copies are done in different process contexts, necessitating
+two context switches per group of I/O requests.
+These problems arise because of the current inability of the kernel
+to do page-in operations
+for an address space other than that of the currently-running process,
+and the current inconvenience of mapping process-owned pages into the kernel
+buffer cache.
+Both of these problems are expected to be solved in the next version
+of the virtual memory system,
+and thus we chose not to address them in the current implementation.
+With the new version of the virtual memory system, we expect to use
+any part of physical memory as part of the buffer cache,
+even though it will not be entirely addressable at once within the kernel.
+In that system, the implementation of a memory-based filesystem
+that avoids the double copy and context switches will be much easier.
+.PP
+Ideally part of the kernel's address space would reside in pageable memory.
+Once such a facility is available it would be most efficient to
+build a memory-based filesystem within the kernel.
+One potential problem with such a scheme is that many kernels
+are limited to a small address space (usually a few megabytes).
+This restriction limits the size of memory-based
+filesystem that such a machine can support.
+On such a machine, the kernel can describe a memory-based filesystem
+that is larger than its address space and use a ``window''
+to map the larger filesystem address space into its limited address space.
+The window would maintain a cache of recently accessed pages.
+The problem with this scheme is that if the working set of
+active pages is greater than the size of the window, then
+much time is spent remapping pages and invalidating
+translation buffers.
+Alternatively, a separate address space could be constructed for each
+memory-based filesystem as in the current implementation,
+and the memory-resident pages of that address space could be mapped
+exactly as other cached pages are accessed.
+.PP
+The current system uses the existing local filesystem structures
+and code to implement the memory-based filesystem.
+The major advantages of this approach are the sharing of code
+and the simplicity of the approach.
+There are several disadvantages, however.
+One is that the size of the filesystem is fixed at mount time.
+This means that a fixed number of inodes (files) and data blocks
+can be supported.
+Currently, this approach requires enough swap space for the entire
+filesystem, and prevents expansion and contraction of the filesystem on demand.
+The current design also prevents the filesystem from taking advantage
+of the memory-resident character of the filesystem.
+It would be interesting to explore other filesystem implementations
+that would be less expensive to execute and that would make better
+use of the space.
+For example, the current filesystem structure is optimized for magnetic
+disks.
+It includes replicated control structures, ``cylinder groups''
+with separate allocation maps and control structures,
+and data structures that optimize rotational layout of files.
+None of this is useful in a memory-based filesystem (at least when the
+backing store for the filesystem is dynamically allocated and not
+contiguous on a single disk type).
+On the other hand,
+directories could be implemented using dynamically-allocated
+memory organized as linked lists or trees rather than as files stored
+in ``disk'' blocks.
+Allocation and location of pages for file data might use virtual memory
+primitives and data structures rather than direct and indirect blocks.
+A reimplementation along these lines will be considered when the virtual
+memory system in the current system has been replaced.
+.[
+$LIST$
+.]
diff --git a/share/doc/papers/memfs/A.t b/share/doc/papers/memfs/A.t
new file mode 100644
index 0000000..c1938c8
--- /dev/null
+++ b/share/doc/papers/memfs/A.t
@@ -0,0 +1,173 @@
+.\" Copyright (c) 1990 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)A.t 5.1 (Berkeley) 4/16/91
+.\"
+.bp
+.nr PS 10
+.nr VS 12
+.SH
+Appendix A - Implementation Details
+.LP
+.nf
+.vS
+/*
+ * This structure defines the control data for the memory
+ * based file system.
+ */
+struct mfsnode {
+ struct vnode *mfs_vnode; /* vnode associated with this mfsnode */
+ caddr_t mfs_baseoff; /* base of file system in memory */
+ long mfs_size; /* size of memory file system */
+ pid_t mfs_pid; /* supporting process pid */
+ struct buf *mfs_buflist; /* list of I/O requests */
+};
+
+/*
+ * Convert between mfsnode pointers and vnode pointers
+ */
+#define VTOMFS(vp) ((struct mfsnode *)(vp)->v_data)
+#define MFSTOV(mfsp) ((mfsp)->mfs_vnode)
+#define MFS_EXIT (struct buf *)-1
+
+/*
+ * Arguments to mount MFS
+ */
+struct mfs_args {
+ char *name; /* name to export for statfs */
+ caddr_t base; /* base address of file system in memory */
+ u_long size; /* size of file system */
+};
+.bp
+/*
+ * Mount an MFS filesystem.
+ */
+mfs_mount(mp, path, data)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+{
+ struct vnode *devvp;
+ struct mfsnode *mfsp;
+ struct buf *bp;
+ struct mfs_args args;
+
+ /*
+ * Create a block device to represent the disk.
+ */
+ devvp = getnewvnode(VT_MFS, VBLK, &mfs_vnodeops);
+ mfsp = VTOMFS(devvp);
+ /*
+ * Save base address of the filesystem from the supporting process.
+ */
+ copyin(data, &args, (sizeof mfs_args));
+ mfsp->mfs_baseoff = args.base;
+ mfsp->mfs_size = args.size;
+ /*
+ * Record the process identifier of the supporting process.
+ */
+ mfsp->mfs_pid = u.u_procp->p_pid;
+ /*
+ * Mount the filesystem.
+ */
+ mfsp->mfs_buflist = NULL;
+ mountfs(devvp, mp);
+ /*
+ * Loop processing I/O requests.
+ */
+ while (mfsp->mfs_buflist != MFS_EXIT) {
+ while (mfsp->mfs_buflist != NULL) {
+ bp = mfsp->mfs_buflist;
+ mfsp->mfs_buflist = bp->av_forw;
+ offset = mfsp->mfs_baseoff + (bp->b_blkno * DEV_BSIZE);
+ if (bp->b_flags & B_READ)
+ copyin(offset, bp->b_un.b_addr, bp->b_bcount);
+ else /* write_request */
+ copyout(bp->b_un.b_addr, offset, bp->b_bcount);
+ biodone(bp);
+ }
+ sleep((caddr_t)devvp, PWAIT);
+ }
+}
+.bp
+/*
+ * If the MFS process requests the I/O then we must do it directly.
+ * Otherwise put the request on the list and request the MFS process
+ * to be run.
+ */
+mfs_strategy(bp)
+ struct buf *bp;
+{
+ struct vnode *devvp;
+ struct mfsnode *mfsp;
+ off_t offset;
+
+ devvp = bp->b_vp;
+ mfsp = VTOMFS(devvp);
+ if (mfsp->mfs_pid == u.u_procp->p_pid) {
+ offset = mfsp->mfs_baseoff + (bp->b_blkno * DEV_BSIZE);
+ if (bp->b_flags & B_READ)
+ copyin(offset, bp->b_un.b_addr, bp->b_bcount);
+ else /* write_request */
+ copyout(bp->b_un.b_addr, offset, bp->b_bcount);
+ biodone(bp);
+ } else {
+ bp->av_forw = mfsp->mfs_buflist;
+ mfsp->mfs_buflist = bp;
+ wakeup((caddr_t)bp->b_vp);
+ }
+}
+
+/*
+ * The close routine is called by unmount after the filesystem
+ * has been successfully unmounted.
+ */
+mfs_close(devvp)
+ struct vnode *devvp;
+{
+ struct mfsnode *mfsp = VTOMFS(vp);
+ struct buf *bp;
+
+ /*
+ * Finish any pending I/O requests.
+ */
+ while (bp = mfsp->mfs_buflist) {
+ mfsp->mfs_buflist = bp->av_forw;
+ mfs_doio(bp, mfsp->mfs_baseoff);
+ wakeup((caddr_t)bp);
+ }
+ /*
+ * Send a request to the filesystem server to exit.
+ */
+ mfsp->mfs_buflist = MFS_EXIT;
+ wakeup((caddr_t)vp);
+}
+.vE
diff --git a/share/doc/papers/memfs/Makefile b/share/doc/papers/memfs/Makefile
new file mode 100644
index 0000000..3e67998
--- /dev/null
+++ b/share/doc/papers/memfs/Makefile
@@ -0,0 +1,22 @@
+# @(#)Makefile 1.8 (Berkeley) 6/8/93
+
+DIR= papers/memfs
+SRCS= 0.t 1.t
+MACROS= -ms
+REFER= refer -n -e -l -s -p ref.bib
+EXTRA= ref.bib A.t tmac.srefs
+CLEANFILES=ref.bib.i A.gt paper.t
+
+paper.ps: paper.t
+ ${ROFF} tmac.srefs paper.t > ${.TARGET}
+
+paper.t: ${SRCS} ref.bib.i A.gt
+ ${REFER} ${SRCS} A.gt > ${.TARGET}
+
+ref.bib.i: ref.bib
+ ${INDXBIB} ref.bib
+
+A.gt: A.t
+ ${GRIND} < A.t > A.gt
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/memfs/ref.bib b/share/doc/papers/memfs/ref.bib
new file mode 100644
index 0000000..89ae507
--- /dev/null
+++ b/share/doc/papers/memfs/ref.bib
@@ -0,0 +1,49 @@
+%A M. K. McKusick
+%A J. M. Bloom
+%A M. J. Karels
+%T Bug Fixes and Changes in 4.3BSD
+%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
+%I \s-1USENIX\s0 Association
+%C Berkeley, CA
+%P 12:1\-22
+%D 1986
+
+%A M. J. Karels
+%T Changes to the Kernel in 4.3BSD
+%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
+%I \s-1USENIX\s0 Association
+%C Berkeley, CA
+%P 13:1\-32
+%D 1986
+
+%A S. J. Leffler
+%A M. K. McKusick
+%A M. J. Karels
+%A J. S. Quarterman
+%T The Design and Implementation of the 4.3BSD UNIX Operating System
+%I Addison-Wesley
+%C Reading, MA
+%D 1989
+
+%A R. M. White
+%T Disk Storage Technology
+%J Scientific American
+%V 243
+%N 2
+%P 138\-148
+%D August 1980
+
+%A A. J. Smith
+%T Bibliography on file and I/O system optimizations and related topics
+%J Operating Systems Review
+%V 14
+%N 4
+%P 39\-54
+%D October 1981
+
+%A Masataka Ohta
+%A Hiroshi Tezuka
+%T A Fast /tmp File System by Async Mount Option
+%J \s-1USENIX\s0 Association Conference Proceedings
+%P ???\-???
+%D June 1990
diff --git a/share/doc/papers/memfs/spell.ok b/share/doc/papers/memfs/spell.ok
new file mode 100644
index 0000000..7aa465f
--- /dev/null
+++ b/share/doc/papers/memfs/spell.ok
@@ -0,0 +1,18 @@
+Berkeley.EDU
+Bostic
+CH
+CM
+Fn
+Karels
+Lb
+MFS
+McKusick
+Pageable
+copyin
+copyout
+email
+filesystem
+filesystems
+mckusick
+pageable
+tmp
diff --git a/share/doc/papers/memfs/tmac.srefs b/share/doc/papers/memfs/tmac.srefs
new file mode 100644
index 0000000..6245118
--- /dev/null
+++ b/share/doc/papers/memfs/tmac.srefs
@@ -0,0 +1,177 @@
+.\" @(#)tmac.srefs 1.14 11/2/88
+.\" REFER macros .... citations
+.de []
+.][ \\$1
+..
+.de ][
+.if \\$1>5 .tm Bad arg to []
+.[\\$1
+..
+.if n .ds [. [
+.\".if t .ds [. \s-2\v'-.4m'\f1
+.if t .ds [. [
+.if n .ds .] ]
+.\".if t .ds .] \v'.4m'\s+2\fP
+.if t .ds .] ]
+.ds (. \& [
+.ds .) ]
+.if n .ds [o ""
+.if n .ds [c ""
+.if t .ds [o ``
+.if t .ds [c ''
+.ds [e \\fIet al.\\fP
+.\" for author list in reference:
+.ds &1 &
+.\" for -m signal (auth1 and auth2, year):
+.ds &2 &
+.\" the next lines deal with the problem of .[1] or [1].
+.\" refer will write "linexxx\*(<.[1]\*(>.
+.\" and either "<." or ">." should produce the .;
+.\" similarly for , and ;
+.rm <. <, <;
+.if n .ds >. .
+.if t .ds >. .
+.if n .ds >, ,
+.if t .ds >, ,
+.if n .ds >; ;
+.if t .ds >; ;
+.de [5 \" tm style
+.FS
+.IP "\\*([F.\0"
+\\*([A, \\f2\\*([T\\f1,
+.ie \\n(TN \\*([M.
+.el Bell Laboratories internal memorandum (\\*([D).
+.RT
+.FE
+..
+.de [0 \" other
+.FS
+.nr [: 0
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \{.nr [: 1
+\\*([A\c\}
+.if !"\\*([T"" \{.if \\n([:>0 ,
+.nr [: 1
+\\f2\\*([T\\f1\c\}
+.if !"\\*([O""\{.if \\n([:>0 ,
+.nr [: 1
+.if \\n([O>0 .nr [: 0
+\\*([O\c
+.if \\n([O>0 \& \c\}
+.ie !"\\*([D"" \{.if \\n([:>0 ,
+.nr [: 1
+\\*([D\c\}
+.if \\n([:>0 \&.
+.RT
+.FE
+..
+.de [1 \" journal article
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \\*([A,
+.if !"\\*([T"" \\*([o\\*([T,\\*([c
+\\f2\\*([J\\f1\c
+.if !"\\*([V"" .if n \& Vol.\&\c
+.if !"\\*([V"" \& \\f3\\*([V\\f1\c
+.if !"\\*([N"" (\\*([N)\c
+.if !"\\*([P"" \{\
+.ie \\n([P>0 , pp. \c
+.el , p. \c
+\\*([P\c\}
+.if !"\\*([I"" .if "\\*([R"" , \\*([I\c
+.if !"\\*([O"" .if \\n([O=0 , \\*([O\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([O"" .if \\n([O>0 \\*([O
+.RT
+.FE
+..
+.de [2 \" book
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \\*([A,
+.if !"\\*([T"" \\f2\\*([T,\\f1
+\\*([I\c
+.if !"\\*([C"" , \\*([C\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([G"" Gov't. ordering no. \\*([G.
+.if !"\\*([O"" \\*([O
+.RT
+.FE
+..
+.de [4 \" report
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+\\*([A, \\*([o\\*([T,\\*([c
+\\*([R\c
+.if !"\\*([G"" \& (\\*([G)\c
+.if !"\\*([I"" , \\*([I\c
+.if !"\\*([C"" , \\*([C\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([O"" \\*([O
+.RT
+.FE
+..
+.de [3 \" article in book
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \\*([A,
+.if !"\\*([T"" \\*([o\\*([T,\\*([c
+.if !"\\*([P"" pp. \\*([P
+in \\f2\\*([B\\f1\c
+.if !"\\*([E"" , ed. \\*([E\c
+.if !"\\*([I"" , \\*([I\c
+.if !"\\*([C"" , \\*([C\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([O"" \\*([O
+.RT
+.FE
+..
+.de ]<
+.[<
+..
+.de [<
+.RT
+.ne 62p
+.ie \\n(rS \{\
+. rs
+. sp 4p
+.\}
+.el .sp 27p
+.Li 2 30.5P
+\fBReferences\fP
+.br
+.if \\n(Ns<2 \{\
+. nr Ns 1
+. ds ST References
+.\}
+.\"nr Tt 7
+.sp 8p
+.rm FS FE
+.\"sy echo '.T3 "\\\\t\\\\tReferences" \\n%' >>Toc
+.ns
+..
+.de [>
+.]>
+..
+.de ]>
+.sp
+..
+.de ]-
+.[-
+..
+.de [-
+.rm [V [P [A [T
+.rm [N [C [B [O
+.rm [R [I [E [D
+..
+.de ]]
+this is never
+executed
+and just
+uses up an end-of-file
+bug.
+..
diff --git a/share/doc/papers/newvm/0.t b/share/doc/papers/newvm/0.t
new file mode 100644
index 0000000..e23a95d
--- /dev/null
+++ b/share/doc/papers/newvm/0.t
@@ -0,0 +1,86 @@
+.\" Copyright (c) 1986 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 5.1 (Berkeley) 4/16/91
+.\"
+.rm CM
+.TL
+A New Virtual Memory Implementation for Berkeley
+.UX
+.AU
+Marshall Kirk McKusick
+Michael J. Karels
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.AB
+With the cost per byte of memory approaching that of the cost per byte
+for disks, and with file systems increasingly distant from the host
+machines, a new approach to the implementation of virtual memory is
+necessary. Rather than preallocating swap space which limits the
+maximum virtual memory that can be supported to the size of the swap
+area, the system should support virtual memory up to the sum of the
+sizes of physical memory plus swap space. For systems with a local swap
+disk, but remote file systems, it may be useful to use some of the memory
+to keep track of the contents of the swap space to avoid multiple fetches
+of the same data from the file system.
+.PP
+The new implementation should also add new functionality. Processes
+should be allowed to have large sparse address spaces, to map files
+into their address spaces, to map device memory into their address
+spaces, and to share memory with other processes. The shared address
+space may either be obtained by mapping a file into (possibly
+different) parts of their address space, or by arranging to share
+``anonymous memory'' (that is, memory that is zero fill on demand, and
+whose contents are lost when the last process unmaps the memory) with
+another process as is done in System V.
+.PP
+One use of shared memory is to provide a high-speed
+Inter-Process Communication (IPC) mechanism between two or more
+cooperating processes. To insure the integrity of data structures
+in a shared region, processes must be able to use semaphores to
+coordinate their access to these shared structures. In System V,
+these semaphores are provided as a set of system calls. Unfortunately,
+the use of system calls reduces the throughput of the shared memory
+IPC to that of existing IPC mechanisms. We are proposing a scheme
+that places the semaphores in the shared memory segment, so that
+machines that have a test-and-set instruction can handle the usual
+uncontested lock and unlock without doing a system call. Only in
+the unusual case of trying to lock an already-locked lock or in
+releasing a wanted lock will a system call be required. The
+interface will allow a user-level implementation of the System V
+semaphore interface on most machines with a much lower runtime cost.
+.AE
+.LP
+.bp
diff --git a/share/doc/papers/newvm/1.t b/share/doc/papers/newvm/1.t
new file mode 100644
index 0000000..657fc2d
--- /dev/null
+++ b/share/doc/papers/newvm/1.t
@@ -0,0 +1,377 @@
+.\" Copyright (c) 1986 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 5.1 (Berkeley) 4/16/91
+.\"
+.NH
+Motivations for a New Virtual Memory System
+.PP
+The virtual memory system distributed with Berkeley UNIX has served
+its design goals admirably well over the ten years of its existence.
+However the relentless advance of technology has begun to render it
+obsolete.
+This section of the paper describes the current design,
+points out the current technological trends,
+and attempts to define the new design considerations that should
+be taken into account in a new virtual memory design.
+.SH
+Implementation of 4.3BSD virtual memory
+.PP
+All Berkeley Software Distributions through 4.3BSD
+have used the same virtual memory design.
+All processes, whether active or sleeping, have some amount of
+virtual address space associated with them.
+This virtual address space
+is the combination of the amount of address space with which they initially
+started plus any stack or heap expansions that they have made.
+All requests for address space are allocated from available swap space
+at the time that they are first made;
+if there is insufficient swap space left to honor the allocation,
+the system call requesting the address space fails synchronously.
+Thus, the limit to available virtual memory is established by the
+amount of swap space allocated to the system.
+.PP
+Memory pages are used in a sort of shell game to contain the
+contents of recently accessed locations.
+As a process first references a location
+a new page is allocated and filled either with initialized data or
+zeros (for new stack and break pages).
+As the supply of free pages begins to run out, dirty pages are
+pushed to the previously allocated swap space so that they can be reused
+to contain newly faulted pages.
+If a previously accessed page that has been pushed to swap is once
+again used, a free page is reallocated and filled from the swap area
+[Babaoglu79], [Someren84].
+.SH
+Design assumptions for 4.3BSD virtual memory
+.PP
+The design criteria for the current virtual memory implementation
+were made in 1979.
+At that time the cost of memory was about a thousand times greater per
+byte than magnetic disks.
+Most machines were used as centralized time sharing machines.
+These machines had far more disk storage than they had memory
+and given the cost tradeoff between memory and disk storage,
+wanted to make maximal use of the memory even at the cost of
+wasting some of the disk space or generating extra disk I/O.
+.PP
+The primary motivation for virtual memory was to allow the
+system to run individual programs whose address space exceeded
+the memory capacity of the machine.
+Thus the virtual memory capability allowed programs to be run that
+could not have been run on a swap based system.
+Equally important in the large central timesharing environment
+was the ability to allow the sum of the memory requirements of
+all active processes to exceed the amount of physical memory on
+the machine.
+The expected mode of operation for which the system was tuned
+was to have the sum of active virtual memory be one and a half
+to two times the physical memory on the machine.
+.PP
+At the time that the virtual memory system was designed,
+most machines ran with little or no networking.
+All the file systems were contained on disks that were
+directly connected to the machine.
+Similarly all the disk space devoted to swap space was also
+directly connected.
+Thus the speed and latency with which file systems could be accessed
+were roughly equivalent to the speed and latency with which swap
+space could be accessed.
+Given the high cost of memory there was little incentive to have
+the kernel keep track of the contents of the swap area once a process
+exited since it could almost as easily and quickly be reread from the
+file system.
+.SH
+New influences
+.PP
+In the ten years since the current virtual memory system was designed,
+many technological advances have occurred.
+One effect of the technological revolution is that the
+micro-processor has become powerful enough to allow users to have their
+own personal workstations.
+Thus the computing environment is moving away from a purely centralized
+time sharing model to an environment in which users have a
+computer on their desk.
+This workstation is linked through a network to a centralized
+pool of machines that provide filing, computing, and spooling services.
+The workstations tend to have a large quantity of memory,
+but little or no disk space.
+Because users do not want to be bothered with backing up their disks,
+and because of the difficulty of having a centralized administration
+backing up hundreds of small disks, these local disks are typically
+used only for temporary storage and as swap space.
+Long term storage is managed by the central file server.
+.PP
+Another major technical advance has been in all levels of storage capacity.
+In the last ten years we have experienced a factor of four decrease in the
+cost per byte of disk storage.
+In this same period of time the cost per byte of memory has dropped
+by a factor of a hundred!
+Thus the cost per byte of memory compared to the cost per byte of disk is
+approaching a difference of only about a factor of ten.
+The effect of this change is that the way in which a machine is used
+is beginning to change dramatically.
+As the amount of physical memory on machines increases and the number of
+users per machine decreases, the expected
+mode of operation is changing from that of supporting more active virtual
+memory than physical memory to that of having a surplus of memory that can
+be used for other purposes.
+.PP
+Because many machines will have more physical memory than they do swap
+space (with diskless workstations as an extreme example!),
+it is no longer reasonable to limit the maximum virtual memory
+to the amount of swap space as is done in the current design.
+Consequently, the new design will allow the maximum virtual memory
+to be the sum of physical memory plus swap space.
+For machines with no swap space, the maximum virtual memory will
+be governed by the amount of physical memory.
+.PP
+Another effect of the current technology is that the latency and overhead
+associated with accessing the file system is considerably higher
+since the access must be be over the network
+rather than to a locally-attached disk.
+One use of the surplus memory would be to
+maintain a cache of recently used files;
+repeated uses of these files would require at most a verification from
+the file server that the data was up to date.
+Under the current design, file caching is done by the buffer pool,
+while the free memory is maintained in a separate pool.
+The new design should have only a single memory pool so that any
+free memory can be used to cache recently accessed files.
+.PP
+Another portion of the memory will be used to keep track of the contents
+of the blocks on any locally-attached swap space analogously
+to the way that memory pages are handled.
+Thus inactive swap blocks can also be used to cache less-recently-used
+file data.
+Since the swap disk is locally attached, it can be much more quickly
+accessed than a remotely located file system.
+This design allows the user to simply allocate their entire local disk
+to swap space, thus allowing the system to decide what files should
+be cached to maximize its usefulness.
+This design has two major benefits.
+It relieves the user of deciding what files
+should be kept in a small local file system.
+It also insures that all modified files are migrated back to the
+file server in a timely fashion, thus eliminating the need to dump
+the local disk or push the files manually.
+.NH
+User Interface
+.PP
+This section outlines our new virtual memory interface as it is
+currently envisioned.
+The details of the system call interface are contained in Appendix A.
+.SH
+Regions
+.PP
+The virtual memory interface is designed to support both large,
+sparse address spaces as well as small, densely-used address spaces.
+In this context, ``small'' is an address space roughly the
+size of the physical memory on the machine,
+while ``large'' may extend up to the maximum addressability of the machine.
+A process may divide its address space up into a number of regions.
+Initially a process begins with four regions;
+a shared read-only fill-on-demand region with its text,
+a private fill-on-demand region for its initialized data,
+a private zero-fill-on-demand region for its uninitialized data and heap,
+and a private zero-fill-on-demand region for its stack.
+In addition to these regions, a process may allocate new ones.
+The regions may not overlap and the system may impose an alignment
+constraint, but the size of the region should not be limited
+beyond the constraints of the size of the virtual address space.
+.PP
+Each new region may be mapped either as private or shared.
+When it is privately mapped, changes to the contents of the region
+are not reflected to any other process that map the same region.
+Regions may be mapped read-only or read-write.
+As an example, a shared library would be implemented as two regions;
+a shared read-only region for the text, and a private read-write
+region for the global variables associated with the library.
+.PP
+A region may be allocated with one of several allocation strategies.
+It may map some memory hardware on the machine such as a frame buffer.
+Since the hardware is responsible for storing the data,
+such regions must be exclusive use if they are privately mapped.
+.PP
+A region can map all or part of a file.
+As the pages are first accessed, the region is filled in with the
+appropriate part of the file.
+If the region is mapped read-write and shared, changes to the
+contents of the region are reflected back into the contents of the file.
+If the region is read-write but private,
+changes to the region are copied to a private page that is not
+visible to other processes mapping the file,
+and these modified pages are not reflected back to the file.
+.PP
+The final type of region is ``anonymous memory''.
+Uninitialed data uses such a region, privately mapped;
+it is zero-fill-on-demand and its contents are abandoned
+when the last reference is dropped.
+Unlike a region that is mapped from a file,
+the contents of an anonymous region will never be read from or
+written to a disk unless memory is short and part of the region
+must be paged to a swap area.
+If one of these regions is mapped shared,
+then all processes see the changes in the region.
+This difference has important performance considerations;
+the overhead of reading, flushing, and possibly allocating a file
+is much higher than simply zeroing memory.
+.PP
+If several processes wish to share a region,
+then they must have some way of rendezvousing.
+For a mapped file this is easy;
+the name of the file is used as the rendezvous point.
+However, processes may not need the semantics of mapped files
+nor be willing to pay the overhead associated with them.
+For anonymous memory they must use some other rendezvous point.
+Our current interface allows processes to associate a
+descriptor with a region, which it may then pass to other
+processes that wish to attach to the region.
+Such a descriptor may be bound into the UNIX file system
+name space so that other processes can find it just as
+they would with a mapped file.
+.SH
+Shared memory as high speed interprocess communication
+.PP
+The primary use envisioned for shared memory is to
+provide a high speed interprocess communication (IPC) mechanism
+between cooperating processes.
+Existing IPC mechanisms (\fIi.e.\fP pipes, sockets, or streams)
+require a system call to hand off a set
+of data destined for another process, and another system call
+by the recipient process to receive the data.
+Even if the data can be transferred by remapping the data pages
+to avoid a memory to memory copy, the overhead of doing the system
+calls limits the throughput of all but the largest transfers.
+Shared memory, by contrast, allows processes to share data at any
+level of granularity without system intervention.
+.PP
+However, to maintain all but the simplest of data structures,
+the processes must serialize their modifications to shared
+data structures if they are to avoid corrupting them.
+This serialization is typically done with semaphores.
+Unfortunately, most implementations of semaphores are
+done with system calls.
+Thus processes are once again limited by the need to do two
+system calls per transaction, one to lock the semaphore, the
+second to release it.
+The net effect is that the shared memory model provides little if
+any improvement in interprocess bandwidth.
+.PP
+To achieve a significant improvement in interprocess bandwidth
+requires a large decrease in the number of system calls needed to
+achieve the interaction.
+In profiling applications that use
+serialization locks such as the UNIX kernel,
+one typically finds that most locks are not contested.
+Thus if one can find a way to avoid doing a system call in the case
+in which a lock is not contested,
+one would expect to be able to dramatically reduce the number
+of system calls needed to achieve serialization.
+.PP
+In our design, cooperating processes manage their semaphores
+in their own address space.
+In the typical case, a process executes an atomic test-and-set instruction
+to acquire a lock, finds it free, and thus is able to get it.
+Only in the (rare) case where the lock is already set does the process
+need to do a system call to wait for the lock to clear.
+When a process is finished with a lock,
+it can clear the lock itself.
+Only if the ``WANT'' flag for the lock has been set is
+it necessary for the process to do a system call to cause the other
+process(es) to be awakened.
+.PP
+Another issue that must be considered is portability.
+Some computers require access to special hardware to implement
+atomic interprocessor test-and-set.
+For such machines the setting and clearing of locks would
+all have to be done with system calls;
+applications could still use the same interface without change,
+though they would tend to run slowly.
+.PP
+The other issue of compatibility is with System V's semaphore
+implementation.
+Since the System V interface has been in existence for several years,
+and applications have been built that depend on this interface,
+it is important that this interface also be available.
+Although the interface is based on system calls for both setting and
+clearing locks,
+the same interface can be obtained using our interface without
+system calls in most cases.
+.PP
+This implementation can be achieved as follows.
+System V allows entire sets of semaphores to be set concurrently.
+If any of the locks are unavailable, the process is put to sleep
+until they all become available.
+Under our paradigm, a single additional semaphore is defined
+that serializes access to the set of semaphores being simulated.
+Once obtained in the usual way, the set of semaphores can be
+inspected to see if the desired ones are available.
+If they are available, they are set, the guardian semaphore
+is released and the process proceeds.
+If one or more of the requested set is not available,
+the guardian semaphore is released and the process selects an
+unavailable semaphores for which to wait.
+On being reawakened, the whole selection process must be repeated.
+.PP
+In all the above examples, there appears to be a race condition.
+Between the time that the process finds that a semaphore is locked,
+and the time that it manages to call the system to sleep on the
+semaphore another process may unlock the semaphore and issue a wakeup call.
+Luckily the race can be avoided.
+The insight that is critical is that the process and the kernel agree
+on the physical byte of memory that is being used for the semaphore.
+The system call to put a process to sleep takes a pointer
+to the desired semaphore as its argument so that once inside
+the kernel, the kernel can repeat the test-and-set.
+If the lock has cleared
+(and possibly the wakeup issued) between the time that the process
+did the test-and-set and eventually got into the sleep request system call,
+then the kernel immediately resumes the process rather than putting
+it to sleep.
+Thus the only problem to solve is how the kernel interlocks between testing
+a semaphore and going to sleep;
+this problem has already been solved on existing systems.
+.NH
+References
+.sp
+.IP [Babaoglu79] 20
+Babaoglu, O., and Joy, W.,
+``Data Structures Added in the Berkeley Virtual Memory Extensions
+to the UNIX Operating System''
+Computer Systems Research Group, Dept of EECS, University of California,
+Berkeley, CA 94720, USA, November 1979.
+.IP [Someren84] 20
+Someren, J. van,
+``Paging in Berkeley UNIX'',
+Laboratorium voor schakeltechniek en techneik v.d.
+informatieverwerkende machines,
+Codenummer 051560-44(1984)01, February 1984.
diff --git a/share/doc/papers/newvm/Makefile b/share/doc/papers/newvm/Makefile
new file mode 100644
index 0000000..8def3b2
--- /dev/null
+++ b/share/doc/papers/newvm/Makefile
@@ -0,0 +1,10 @@
+# @(#)Makefile 1.4 (Berkeley) 6/8/93
+
+DIR= papers/newvm
+SRCS= 0.t 1.t a.t
+MACROS= -ms
+
+paper.ps: ${SRCS}
+ ${TBL} ${SRCS} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/newvm/a.t b/share/doc/papers/newvm/a.t
new file mode 100644
index 0000000..3b6213a
--- /dev/null
+++ b/share/doc/papers/newvm/a.t
@@ -0,0 +1,239 @@
+.\" Copyright (c) 1986 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)a.t 5.1 (Berkeley) 4/16/91
+.\"
+.sp 2
+.ne 2i
+.NH
+Appendix A \- Virtual Memory Interface
+.SH
+Mapping pages
+.PP
+The system supports sharing of data between processes
+by allowing pages to be mapped into memory. These mapped
+pages may be \fIshared\fP with other processes or \fIprivate\fP
+to the process.
+Protection and sharing options are defined in \fI<sys/mman.h>\fP as:
+.DS
+.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
+/* protections are chosen from these bits, or-ed together */
+#define PROT_READ 0x04 /* pages can be read */
+#define PROT_WRITE 0x02 /* pages can be written */
+#define PROT_EXEC 0x01 /* pages can be executed */
+.DE
+.DS
+.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
+/* flags contain mapping type, sharing type and options */
+/* mapping type; choose one */
+#define MAP_FILE 0x0001 /* mapped from a file or device */
+#define MAP_ANON 0x0002 /* allocated from memory, swap space */
+#define MAP_TYPE 0x000f /* mask for type field */
+.DE
+.DS
+.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
+/* sharing types; choose one */
+#define MAP_SHARED 0x0010 /* share changes */
+#define MAP_PRIVATE 0x0000 /* changes are private */
+.DE
+.DS
+.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
+/* other flags */
+#define MAP_FIXED 0x0020 /* map addr must be exactly as requested */
+#define MAP_INHERIT 0x0040 /* region is retained after exec */
+#define MAP_HASSEMAPHORE 0x0080 /* region may contain semaphores */
+.DE
+The cpu-dependent size of a page is returned by the
+\fIgetpagesize\fP system call:
+.DS
+pagesize = getpagesize();
+result int pagesize;
+.DE
+.LP
+The call:
+.DS
+maddr = mmap(addr, len, prot, flags, fd, pos);
+result caddr_t maddr; caddr_t addr; int *len, prot, flags, fd; off_t pos;
+.DE
+causes the pages starting at \fIaddr\fP and continuing
+for at most \fIlen\fP bytes to be mapped from the object represented by
+descriptor \fIfd\fP, starting at byte offset \fIpos\fP.
+The starting address of the region is returned;
+for the convenience of the system,
+it may differ from that supplied
+unless the MAP_FIXED flag is given,
+in which case the exact address will be used or the call will fail.
+The actual amount mapped is returned in \fIlen\fP.
+The \fIaddr\fP, \fIlen\fP, and \fIpos\fP parameters
+must all be multiples of the pagesize.
+A successful \fImmap\fP will delete any previous mapping
+in the allocated address range.
+The parameter \fIprot\fP specifies the accessibility
+of the mapped pages.
+The parameter \fIflags\fP specifies
+the type of object to be mapped,
+mapping options, and
+whether modifications made to
+this mapped copy of the page
+are to be kept \fIprivate\fP, or are to be \fIshared\fP with
+other references.
+Possible types include MAP_FILE,
+mapping a regular file or character-special device memory,
+and MAP_ANON, which maps memory not associated with any specific file.
+The file descriptor used for creating MAP_ANON regions is used only
+for naming, and may be given as \-1 if no name
+is associated with the region.\(dg
+.FS
+\(dg The current design does not allow a process
+to specify the location of swap space.
+In the future we may define an additional mapping type, MAP_SWAP,
+in which the file descriptor argument specifies a file
+or device to which swapping should be done.
+.FE
+The MAP_INHERIT flag allows a region to be inherited after an \fIexec\fP.
+The MAP_HASSEMAPHORE flag allows special handling for
+regions that may contain semaphores.
+.PP
+A facility is provided to synchronize a mapped region with the file
+it maps; the call
+.DS
+msync(addr, len);
+caddr_t addr; int len;
+.DE
+writes any modified pages back to the filesystem and updates
+the file modification time.
+If \fIlen\fP is 0, all modified pages within the region containing \fIaddr\fP
+will be flushed;
+if \fIlen\fP is non-zero, only the pages containing \fIaddr\fP and \fIlen\fP
+succeeding locations will be examined.
+Any required synchronization of memory caches
+will also take place at this time.
+Filesystem operations on a file that is mapped for shared modifications
+are unpredictable except after an \fImsync\fP.
+.PP
+A mapping can be removed by the call
+.DS
+munmap(addr, len);
+caddr_t addr; int len;
+.DE
+This call deletes the mappings for the specified address range,
+and causes further references to addresses within the range
+to generate invalid memory references.
+.SH
+Page protection control
+.PP
+A process can control the protection of pages using the call
+.DS
+mprotect(addr, len, prot);
+caddr_t addr; int len, prot;
+.DE
+This call changes the specified pages to have protection \fIprot\fP\|.
+Not all implementations will guarantee protection on a page basis;
+the granularity of protection changes may be as large as an entire region.
+.SH
+Giving and getting advice
+.PP
+A process that has knowledge of its memory behavior may
+use the \fImadvise\fP call:
+.DS
+madvise(addr, len, behav);
+caddr_t addr; int len, behav;
+.DE
+\fIBehav\fP describes expected behavior, as given
+in \fI<sys/mman.h>\fP:
+.DS
+.ta \w'#define\ \ 'u +\w'MADV_SEQUENTIAL\ \ 'u +\w'00\ \ \ \ 'u
+#define MADV_NORMAL 0 /* no further special treatment */
+#define MADV_RANDOM 1 /* expect random page references */
+#define MADV_SEQUENTIAL 2 /* expect sequential references */
+#define MADV_WILLNEED 3 /* will need these pages */
+#define MADV_DONTNEED 4 /* don't need these pages */
+#define MADV_SPACEAVAIL 5 /* insure that resources are reserved */
+.DE
+Finally, a process may obtain information about whether pages are
+core resident by using the call
+.DS
+mincore(addr, len, vec)
+caddr_t addr; int len; result char *vec;
+.DE
+Here the current core residency of the pages is returned
+in the character array \fIvec\fP, with a value of 1 meaning
+that the page is in-core.
+.SH
+Synchronization primitives
+.PP
+Primitives are provided for synchronization using semaphores in shared memory.
+Semaphores must lie within a MAP_SHARED region with at least modes
+PROT_READ and PROT_WRITE.
+The MAP_HASSEMAPHORE flag must have been specified when the region was created.
+To acquire a lock a process calls:
+.DS
+value = mset(sem, wait)
+result int value; semaphore *sem; int wait;
+.DE
+\fIMset\fP indivisibly tests and sets the semaphore \fIsem\fP.
+If the the previous value is zero, the process has acquired the lock
+and \fImset\fP returns true immediately.
+Otherwise, if the \fIwait\fP flag is zero,
+failure is returned.
+If \fIwait\fP is true and the previous value is non-zero,
+\fImset\fP relinquishes the processor until notified that it should retry.
+.LP
+To release a lock a process calls:
+.DS
+mclear(sem)
+semaphore *sem;
+.DE
+\fIMclear\fP indivisibly tests and clears the semaphore \fIsem\fP.
+If the ``WANT'' flag is zero in the previous value,
+\fImclear\fP returns immediately.
+If the ``WANT'' flag is non-zero in the previous value,
+\fImclear\fP arranges for waiting processes to retry before returning.
+.PP
+Two routines provide services analogous to the kernel
+\fIsleep\fP and \fIwakeup\fP functions interpreted in the domain of
+shared memory.
+A process may relinquish the processor by calling \fImsleep\fP
+with a set semaphore:
+.DS
+msleep(sem)
+semaphore *sem;
+.DE
+If the semaphore is still set when it is checked by the kernel,
+the process will be put in a sleeping state
+until some other process issues an \fImwakeup\fP for the same semaphore
+within the region using the call:
+.DS
+mwakeup(sem)
+semaphore *sem;
+.DE
+An \fImwakeup\fP may awaken all sleepers on the semaphore,
+or may awaken only the next sleeper on a queue.
diff --git a/share/doc/papers/newvm/spell.ok b/share/doc/papers/newvm/spell.ok
new file mode 100644
index 0000000..543dc7e
--- /dev/null
+++ b/share/doc/papers/newvm/spell.ok
@@ -0,0 +1,56 @@
+ANON
+Babaoglu
+Babaoglu79
+Behav
+CM
+Codenummer
+DONTNEED
+Dept
+EECS
+Filesystem
+HASSEMAPHORE
+IPC
+Karels
+Laboratorium
+MADV
+McKusick
+Mclear
+Mset
+NOEXTEND
+PROT
+SPACEAVAIL
+Someren
+Someren84
+WILLNEED
+addr
+behav
+caching
+caddr
+es
+fd
+filesystem
+getpagesize
+informatieverwerkende
+len
+maddr
+madvise
+mclear
+mincore
+mman.h
+mmap
+mprotect
+mset
+msleep
+msync
+munmap
+mwakeup
+pagesize
+pos
+prot
+runtime
+schakeltechniek
+sem
+techneik
+v.d
+vec
+voor
diff --git a/share/doc/papers/nqnfs/Makefile b/share/doc/papers/nqnfs/Makefile
new file mode 100644
index 0000000..37530fa
--- /dev/null
+++ b/share/doc/papers/nqnfs/Makefile
@@ -0,0 +1,10 @@
+# @(#)Makefile 8.1 (Berkeley) 4/20/94
+
+DIR= papers/nqnfs
+SRCS= nqnfs.me
+MACROS= -me
+
+paper.ps: ${SRCS}
+ ${PIC} ${SRCS} | ${TBL} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/nqnfs/nqnfs.me b/share/doc/papers/nqnfs/nqnfs.me
new file mode 100644
index 0000000..ce9003e
--- /dev/null
+++ b/share/doc/papers/nqnfs/nqnfs.me
@@ -0,0 +1,2007 @@
+.\" Copyright (c) 1993 The Usenix Association. All rights reserved.
+.\"
+.\" This document is derived from software contributed to Berkeley by
+.\" Rick Macklem at The University of Guelph with the permission of
+.\" the Usenix Association.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)nqnfs.me 8.1 (Berkeley) 4/20/94
+.\"
+.lp
+.nr PS 12
+.ps 12
+Reprinted with permission from the "Proceedings of the Winter 1994 Usenix
+Conference", January 1994, San Francisco, CA, Copyright The Usenix
+Association.
+.nr PS 14
+.ps 14
+.sp
+.ce
+\fBNot Quite NFS, Soft Cache Consistency for NFS\fR
+.nr PS 12
+.ps 12
+.sp
+.ce
+\fIRick Macklem\fR
+.ce
+\fIUniversity of Guelph\fR
+.sp
+.nr PS 12
+.ps 12
+.ce
+\fBAbstract\fR
+.nr PS 10
+.ps 10
+.pp
+There are some constraints inherent in the NFS\(tm\(mo protocol
+that result in performance limitations
+for high performance
+workstation environments.
+This paper discusses an NFS-like protocol named Not Quite NFS (NQNFS),
+designed to address some of these limitations.
+This protocol provides full cache consistency during normal
+operation, while permitting more effective client-side caching in an
+effort to improve performance.
+There are also a variety of minor protocol changes, in order to resolve
+various NFS issues.
+The emphasis is on observed performance of a
+preliminary implementation of the protocol, in order to show
+how well this design works
+and to suggest possible areas for further improvement.
+.sh 1 "Introduction"
+.pp
+It has been observed that
+overall workstation performance has not been scaling with
+processor speed and that file system I/O is a limiting factor [Ousterhout90].
+Ousterhout
+notes
+that a principal challenge for operating system developers is the
+decoupling of system calls from their underlying I/O operations, in order
+to improve average system call response times.
+For distributed file systems, every synchronous Remote Procedure Call (RPC)
+takes a minimum of a few milliseconds and, as such, is analogous to an
+underlying I/O operation.
+This suggests that client caching with a very good
+hit ratio for read type operations, along with asynchronous writing, is required in order to avoid delays waiting for RPC replies.
+However, the NFS protocol requires that the server be stateless\**
+.(f
+\**The server must not require any state that may be lost due to a crash, to
+function correctly.
+.)f
+and does not provide any explicit mechanism for client cache
+consistency, putting
+constraints on how the client may cache data.
+This paper describes an NFS-like protocol that includes a cache consistency
+component designed to enhance client caching performance. It does provide
+full consistency under normal operation, but without requiring that hard
+state information be maintained on the server.
+Design tradeoffs were made towards simplicity and
+high performance over cache consistency under abnormal conditions.
+The protocol design uses a variation of Leases [Gray89]
+to provide state on the server that does not need to be recovered after a
+crash.
+.pp
+The protocol also includes changes designed to address other limitations
+of NFS in a modern workstation environment.
+The use of TCP transport is optionally available to avoid
+the pitfalls of Sun RPC over UDP transport when running across an internetwork [Nowicki89].
+Kerberos [Steiner88] support is available
+to do proper user authentication, in order to provide improved security and
+arbitrary client to server user ID mappings.
+There are also a variety of other changes to accommodate large file systems,
+such as 64bit file sizes and offsets, as well as lifting the 8Kbyte I/O size
+limit.
+The remainder of this paper gives an overview of the protocol, highlighting
+performance related components, followed by an evaluation of resultant performance
+for the 4.4BSD implementation.
+.sh 1 "Distributed File Systems and Caching"
+.pp
+Clients using distributed file systems cache recently-used data in order
+to reduce the number of synchronous server operations, and therefore improve
+average response times for system calls.
+Unfortunately, maintaining consistency between these caches is a problem
+whenever write sharing occurs; that is, when a process on a client writes
+to a file and one or more processes on other client(s) read the file.
+If the writer closes the file before any reader(s) open the file for reading,
+this is called sequential write sharing. Both the Andrew ITC file system
+[Howard88] and NFS [Sandberg85] maintain consistency for sequential write
+sharing by requiring the writer to push all the writes through to the
+server on close and having readers check to see if the file has been
+modified upon open. If the file has been modified, the client throws away
+all cached data for that file, as it is now stale.
+NFS implementations typically detect file modification by checking a cached
+copy of the file's modification time; since this cached value is often
+several seconds out of date and only has a resolution of one second, an NFS
+client often uses stale cached data for some time after the file has
+been updated on the server.
+.pp
+A more difficult case is concurrent write sharing, where write operations are intermixed
+with read operations.
+Consistency for this case, often referred to as "full cache consistency,"
+requires that a reader always receives the most recently written data.
+Neither NFS nor the Andrew ITC file system maintain consistency for this
+case.
+The simplest mechanism for maintaining full cache consistency is the one
+used by Sprite [Nelson88], which disables all client caching of the
+file whenever concurrent write sharing might occur.
+There are other mechanisms described in the literature [Kent87a,
+Burrows88], but they appeared to be too elaborate for incorporation
+into NQNFS (for example, Kent's requires specialized hardware).
+NQNFS differs from Sprite in the way it
+detects write sharing. The Sprite server maintains a list of files currently open
+by the various clients and detects write sharing when a file open request
+for writing is received and the file is already open for reading
+(or vice versa).
+This list of open files is hard state information that must be recovered
+after a server crash, which is a significant problem in its own
+right [Mogul93, Welch90].
+.pp
+The approach used by NQNFS is a variant of the Leases mechanism [Gray89].
+In this model, the server issues to a client a promise, referred to as a
+"lease," that the client may cache a specific object without fear of
+conflict.
+A lease has a limited duration and must be renewed by the client if it
+wishes to continue to cache the object.
+In NQNFS, clients hold short-term (up to one minute) leases on files
+for reading or writing.
+The leases are analogous to entries in the open file list, except that
+they expire after the lease term unless renewed by the client.
+As such, one minute after issuing the last lease there are no current
+leases and therefore no lease records to be recovered after a crash, hence
+the term "soft server state."
+.pp
+A related design consideration is the way client writing is done.
+Synchronous writing requires that all writes be pushed through to the server
+during the write system call.
+This is the simplest variant, from a consistency point of view, since the
+server always has the most recently written data. It also permits any write
+errors, such as "file system out of space" to be propagated back to the
+client's process via the write system call return.
+Unfortunately this approach limits the client write rate, based on server write
+performance and client/server RPC round trip time (RTT).
+.pp
+An alternative to this is delayed writing, where the write system call returns
+as soon as the data is cached on the client and the data is written to the
+server sometime later.
+This permits client writing to occur at the rate of local storage access
+up to the size of the local cache.
+Also, for cases where file truncation/deletion occurs shortly after writing,
+the write to the server may be avoided since the data has already been
+deleted, reducing server write load.
+There are some obvious drawbacks to this approach.
+For any Sprite-like system to maintain
+full consistency, the server must "callback" to the client to cause the
+delayed writes to be written back to the server when write sharing is about to
+occur.
+There are also problems with the propagation of errors
+back to the client process that issued the write system call.
+The reason for this is that
+the system call has already returned without reporting an error and the
+process may also have already terminated.
+As well, there is a risk of the loss of recently written data if the client
+crashes before the data is written back to the server.
+.pp
+A compromise between these two alternatives is asynchronous writing, where
+the write to the server is initiated during the write system call but the write system
+call returns before the write completes.
+This approach minimizes the risk of data loss due to a client crash, but negates
+the possibility of reducing server write load by throwing writes away when
+a file is truncated or deleted.
+.pp
+NFS implementations usually do a mix of asynchronous and delayed writing
+but push all writes to the server upon close, in order to maintain open/close
+consistency.
+Pushing the delayed writes on close
+negates much of the performance advantage of delayed writing, since the
+delays that were avoided in the write system calls are observed in the close
+system call.
+Akin to Sprite, the NQNFS protocol does delayed writing in an effort to achieve
+good client performance and uses a callback mechanism to maintain full cache
+consistency.
+.sh 1 "Related Work"
+.pp
+There has been a great deal of effort put into improving the performance and
+consistency of the NFS protocol. This work can be put in two categories.
+The first category are implementation enhancements for the NFS protocol and
+the second involve modifications to the protocol.
+.pp
+The work done on implementation enhancements have attacked two problem areas,
+NFS server write performance and RPC transport problems.
+Server write performance is a major problem for NFS, in part due to the
+requirement to push all writes to the server upon close and in part due
+to the fact that, for writes, all data and meta-data must be committed to
+non-volatile storage before the server replies to the write RPC.
+The Prestoserve\(tm\(dg
+[Moran90]
+system uses non-volatile RAM as a buffer for recently written data on the server,
+so that the write RPC replies can be returned to the client before the data is written to the
+disk surface.
+Write gathering [Juszczak94] is a software technique used on the server where a write
+RPC request is delayed for a short time in the hope that another contiguous
+write request will arrive, so that they can be merged into one write operation.
+Since the replies to all of the merged writes are not returned to the client until the write
+operation is completed, this delay does not violate the protocol.
+When write operations are merged, the number of disk writes can be reduced,
+improving server write performance.
+Although either of the above reduces write RPC response time for the server,
+it cannot be reduced to zero, and so, any client side caching mechanism
+that reduces write RPC load or client dependence on server RPC response time
+should still improve overall performance.
+Good client side caching should be complementary to these server techniques,
+although client performance improvements as a result of caching may be less
+dramatic when these techniques are used.
+.pp
+In NFS, each Sun RPC request is packaged in a UDP datagram for transmission
+to the server. A timer is started, and if a timeout occurs before the corresponding
+RPC reply is received, the RPC request is retransmitted.
+There are two problems with this model.
+First, when a retransmit timeout occurs, the RPC may be redone, instead of
+simply retransmitting the RPC request message to the server. A recent-request
+cache can be used on the server to minimize the negative impact of redoing
+RPCs [Juszczak89].
+The second problem is that a large UDP datagram, such as a read request or
+write reply, must be fragmented by IP and if any one IP fragment is lost in
+transit, the entire UDP datagram is lost [Kent87]. Since entire requests and replies
+are packaged in a single UDP datagram, this puts an upper bound on the read/write
+data size (8 kbytes).
+.pp
+Adjusting the retransmit timeout (RTT) interval dynamically and applying a
+congestion window on outstanding requests has been shown to be of some help
+[Nowicki89] with the retransmission problem.
+An alternative to this is to use TCP transport to delivery the RPC messages
+reliably [Macklem90] and one of the performance results in this paper
+shows the effects of this further.
+.pp
+Srinivasan and Mogul [Srinivasan89] enhanced the NFS protocol to use the Sprite cache
+consistency algorithm in an effort to improve performance and to provide
+full client cache consistency.
+This experimental implementation demonstrated significantly better
+performance than NFS, but suffered from a lack of crash recovery support.
+The NQNFS protocol design borrowed heavily from this work, but differed
+from the Sprite algorithm by using Leases instead of file open state
+to detect write sharing.
+The decision to use Leases was made primarily to avoid the crash recovery
+problem.
+More recent work by the Sprite group [Baker91] and Mogul [Mogul93] have
+addressed the crash recovery problem, making this design tradeoff more
+questionable now.
+.pp
+Sun has recently updated the NFS protocol to Version 3 [SUN93], using some
+changes similar to NQNFS to address various issues. The Version 3 protocol
+uses 64bit file sizes and offsets, provides a Readdir_and_Lookup RPC and
+an access RPC.
+It also provides cache hints, to permit a client to be able to determine
+whether a file modification is the result of that client's write or some
+other client's write.
+It would be possible to add either Spritely NFS or NQNFS support for cache
+consistency to the NFS Version 3 protocol.
+.sh 1 "NQNFS Consistency Protocol and Recovery"
+.pp
+The NQNFS cache consistency protocol uses a somewhat Sprite-like [Nelson88]
+mechanism, but is based on Leases [Gray89] instead of hard server state information
+about open files.
+The basic principle is that the server disables client caching of files whenever
+concurrent write sharing could occur, by performing a server-to-client
+callback,
+forcing the client to flush its caches and to do all subsequent I/O on the file with
+synchronous RPCs.
+A Sprite server maintains a record of the open state of files for
+all clients and uses this to determine when concurrent write sharing might
+occur.
+This \fIopen state\fR information might also be referred to as an infinite-term
+lease for the file, with explicit lease cancellation.
+NQNFS, on the other hand, uses a short-term lease that expires due to timeout
+after a maximum of one minute, unless explicitly renewed by the client.
+The fundamental difference is that an NQNFS client must keep renewing
+a lease to use cached data whereas a Sprite client assumes the data is valid until canceled
+by the server
+or the file is closed.
+Using leases permits the server to remain "stateless," since the soft
+state information, which consists of the set of current leases, is
+moot after one minute, when all the leases expire.
+.pp
+Whenever a client wishes to access a file's data it must hold one of
+three types of lease: read-caching, write-caching or non-caching.
+The latter type requires that all file operations be done synchronously with
+the server via the appropriate RPCs.
+.pp
+A read-caching lease allows for client data caching but no modifications
+may be done.
+It may, however, be shared between multiple clients. Diagram 1 shows a typical
+read-caching scenario. The vertical solid black lines depict the lease records.
+Note that the time lines are nowhere near to scale, since a client/server
+interaction will normally take less than one hundred milliseconds, whereas the
+normal lease duration is thirty seconds.
+Every lease includes a \fImodrev\fR value, which changes upon every modification
+of the file. It may be used to check to see if data cached on the client is
+still current.
+.pp
+A write-caching lease permits delayed write caching,
+but requires that all data be pushed to the server when the lease expires
+or is terminated by an eviction callback.
+When a write-caching lease has almost expired, the client will attempt to
+extend the lease if the file is still open, but is required to push the delayed writes to the server
+if renewal fails (as depicted by diagram 2).
+The writes may not arrive at the server until after the write lease has
+expired on the client, but this does not result in a consistency problem,
+so long as the write lease is still valid on the server.
+Note that, in diagram 2, the lease record on the server remains current after
+the expiry time, due to the conditions mentioned in section 5.
+If a write RPC is done on the server after the write lease has expired on
+the server, this could be considered an error since consistency could be
+lost, but it is not handled as such by NQNFS.
+.pp
+Diagram 3 depicts how read and write leases are replaced by a non-caching
+lease when there is the potential for write sharing.
+.(z
+.sp
+.PS
+.ps
+.ps 50
+line from 0.738,5.388 to 1.238,5.388
+.ps
+.ps 10
+dashwid = 0.050i
+line dashed from 1.488,10.075 to 1.488,5.450
+line dashed from 2.987,10.075 to 2.987,5.450
+line dashed from 4.487,10.075 to 4.487,5.450
+.ps
+.ps 50
+line from 4.487,7.013 to 4.487,5.950
+line from 2.987,7.700 to 2.987,5.950 to 2.987,6.075
+line from 1.488,7.513 to 1.488,5.950
+line from 2.987,9.700 to 2.987,8.325
+line from 1.488,9.450 to 1.488,8.325
+.ps
+.ps 10
+line from 2.987,6.450 to 4.487,6.200
+line from 4.385,6.192 to 4.487,6.200 to 4.393,6.241
+line from 4.487,6.888 to 2.987,6.575
+line from 3.080,6.620 to 2.987,6.575 to 3.090,6.571
+line from 2.987,7.263 to 4.487,7.013
+line from 4.385,7.004 to 4.487,7.013 to 4.393,7.054
+line from 4.487,7.638 to 2.987,7.388
+line from 3.082,7.429 to 2.987,7.388 to 3.090,7.379
+line from 2.987,6.888 to 1.488,6.575
+line from 1.580,6.620 to 1.488,6.575 to 1.590,6.571
+line from 1.488,7.200 to 2.987,6.950
+line from 2.885,6.942 to 2.987,6.950 to 2.893,6.991
+line from 2.987,7.700 to 1.488,7.513
+line from 1.584,7.550 to 1.488,7.513 to 1.590,7.500
+line from 1.488,8.012 to 2.987,7.763
+line from 2.885,7.754 to 2.987,7.763 to 2.893,7.804
+line from 2.987,9.012 to 1.488,8.825
+line from 1.584,8.862 to 1.488,8.825 to 1.590,8.813
+line from 1.488,9.325 to 2.987,9.137
+line from 2.885,9.125 to 2.987,9.137 to 2.891,9.175
+line from 2.987,9.637 to 1.488,9.450
+line from 1.584,9.487 to 1.488,9.450 to 1.590,9.438
+line from 1.488,9.887 to 2.987,9.700
+line from 2.885,9.688 to 2.987,9.700 to 2.891,9.737
+.ps
+.ps 12
+.ft
+.ft R
+"Lease valid on machine" at 1.363,5.296 ljust
+"with same modrev" at 1.675,7.421 ljust
+"miss)" at 2.612,9.233 ljust
+"(cache" at 2.300,9.358 ljust
+.ps
+.ps 14
+"Diagram #1: Read Caching Leases" at 0.738,5.114 ljust
+"Client B" at 4.112,10.176 ljust
+"Server" at 2.612,10.176 ljust
+"Client A" at 0.925,10.176 ljust
+.ps
+.ps 12
+"from cache" at 4.675,6.546 ljust
+"Read syscalls" at 4.675,6.796 ljust
+"Reply" at 3.737,6.108 ljust
+"(cache miss)" at 3.675,6.421 ljust
+"Read req" at 3.737,6.608 ljust
+"to lease" at 3.112,6.796 ljust
+"Client B added" at 3.112,6.983 ljust
+"Reply" at 3.237,7.296 ljust
+"Read + lease req" at 3.175,7.671 ljust
+"Read syscall" at 4.675,7.608 ljust
+"Reply" at 1.675,6.796 ljust
+"miss)" at 2.487,7.108 ljust
+"Read req (cache" at 1.675,7.233 ljust
+"from cache" at 0.425,6.296 ljust
+"Read syscalls" at 0.425,6.546 ljust
+"cache" at 0.425,6.858 ljust
+"so can still" at 0.425,7.108 ljust
+"Modrev same" at 0.425,7.358 ljust
+"Reply" at 1.675,7.671 ljust
+"Get lease req" at 1.675,8.108 ljust
+"Read syscall" at 0.425,7.983 ljust
+"Lease times out" at 0.425,8.296 ljust
+"from cache" at 0.425,9.046 ljust
+"Read syscalls" at 0.425,9.296 ljust
+"for Client A" at 3.112,9.296 ljust
+"Read caching lease" at 3.112,9.483 ljust
+"Reply" at 1.675,8.983 ljust
+"Read req" at 1.675,9.358 ljust
+"Reply" at 1.675,9.608 ljust
+"Read + lease req" at 1.675,9.921 ljust
+"Read syscall" at 0.425,9.921 ljust
+.ps
+.ft
+.PE
+.sp
+.)z
+.(z
+.sp
+.PS
+.ps
+.ps 50
+line from 1.175,5.700 to 1.300,5.700
+line from 0.738,5.700 to 1.175,5.700
+line from 2.987,6.638 to 2.987,6.075
+.ps
+.ps 10
+dashwid = 0.050i
+line dashed from 2.987,6.575 to 2.987,5.950
+line dashed from 1.488,6.575 to 1.488,5.888
+.ps
+.ps 50
+line from 2.987,9.762 to 2.987,6.638
+line from 1.488,9.450 to 1.488,7.700
+.ps
+.ps 10
+line from 2.987,6.763 to 1.488,6.575
+line from 1.584,6.612 to 1.488,6.575 to 1.590,6.563
+line from 1.488,7.013 to 2.987,6.825
+line from 2.885,6.813 to 2.987,6.825 to 2.891,6.862
+line from 2.987,7.325 to 1.488,7.075
+line from 1.582,7.116 to 1.488,7.075 to 1.590,7.067
+line from 1.488,7.700 to 2.987,7.388
+line from 2.885,7.383 to 2.987,7.388 to 2.895,7.432
+line from 2.987,8.575 to 1.488,8.325
+line from 1.582,8.366 to 1.488,8.325 to 1.590,8.317
+line from 1.488,8.887 to 2.987,8.637
+line from 2.885,8.629 to 2.987,8.637 to 2.893,8.679
+line from 2.987,9.637 to 1.488,9.450
+line from 1.584,9.487 to 1.488,9.450 to 1.590,9.438
+line from 1.488,9.887 to 2.987,9.762
+line from 2.886,9.746 to 2.987,9.762 to 2.890,9.796
+line dashed from 2.987,10.012 to 2.987,6.513
+line dashed from 1.488,10.012 to 1.488,6.513
+.ps
+.ps 12
+.ft
+.ft R
+"write" at 4.237,5.921 ljust
+"Lease valid on machine" at 1.425,5.733 ljust
+.ps
+.ps 14
+"Diagram #2: Write Caching Lease" at 0.738,5.551 ljust
+"Server" at 2.675,10.114 ljust
+"Client A" at 1.113,10.114 ljust
+.ps
+.ps 12
+"seconds after last" at 3.112,5.921 ljust
+"Expires write_slack" at 3.112,6.108 ljust
+"due to write activity" at 3.112,6.608 ljust
+"Expiry delayed" at 3.112,6.796 ljust
+"Lease times out" at 3.112,7.233 ljust
+"Lease renewed" at 3.175,8.546 ljust
+"Lease for client A" at 3.175,9.358 ljust
+"Write caching" at 3.175,9.608 ljust
+"Reply" at 1.675,6.733 ljust
+"Write req" at 1.988,7.046 ljust
+"Reply" at 1.675,7.233 ljust
+"Write req" at 1.675,7.796 ljust
+"Lease expires" at 0.487,7.733 ljust
+"Close syscall" at 0.487,8.108 ljust
+"lease granted" at 1.675,8.546 ljust
+"Get write lease" at 1.675,8.921 ljust
+"before expiry" at 0.487,8.608 ljust
+"Lease renewal" at 0.487,8.796 ljust
+"syscalls" at 0.487,9.046 ljust
+"Delayed write" at 0.487,9.233 ljust
+"lease granted" at 1.675,9.608 ljust
+"Get write lease req" at 1.675,9.921 ljust
+"Write syscall" at 0.487,9.858 ljust
+.ps
+.ft
+.PE
+.sp
+.)z
+.(z
+.sp
+.PS
+.ps
+.ps 50
+line from 0.613,2.638 to 1.238,2.638
+line from 1.488,4.075 to 1.488,3.638
+line from 2.987,4.013 to 2.987,3.575
+line from 4.487,4.013 to 4.487,3.575
+.ps
+.ps 10
+line from 2.987,3.888 to 4.487,3.700
+line from 4.385,3.688 to 4.487,3.700 to 4.391,3.737
+line from 4.487,4.138 to 2.987,3.950
+line from 3.084,3.987 to 2.987,3.950 to 3.090,3.938
+line from 2.987,4.763 to 4.487,4.450
+line from 4.385,4.446 to 4.487,4.450 to 4.395,4.495
+.ps
+.ps 50
+line from 4.487,4.438 to 4.487,4.013
+.ps
+.ps 10
+line from 4.487,5.138 to 2.987,4.888
+line from 3.082,4.929 to 2.987,4.888 to 3.090,4.879
+.ps
+.ps 50
+line from 4.487,6.513 to 4.487,5.513
+line from 4.487,6.513 to 4.487,6.513 to 4.487,5.513
+line from 2.987,5.450 to 2.987,5.200
+line from 1.488,5.075 to 1.488,4.075
+line from 2.987,5.263 to 2.987,4.013
+line from 2.987,7.700 to 2.987,5.325
+line from 4.487,7.575 to 4.487,6.513
+line from 1.488,8.512 to 1.488,8.075
+line from 2.987,8.637 to 2.987,8.075
+line from 2.987,9.637 to 2.987,8.825
+line from 1.488,9.450 to 1.488,8.950
+.ps
+.ps 10
+line from 2.987,4.450 to 1.488,4.263
+line from 1.584,4.300 to 1.488,4.263 to 1.590,4.250
+line from 1.488,4.888 to 2.987,4.575
+line from 2.885,4.571 to 2.987,4.575 to 2.895,4.620
+line from 2.987,5.263 to 1.488,5.075
+line from 1.584,5.112 to 1.488,5.075 to 1.590,5.063
+line from 4.487,5.513 to 2.987,5.325
+line from 3.084,5.362 to 2.987,5.325 to 3.090,5.313
+line from 2.987,5.700 to 4.487,5.575
+line from 4.386,5.558 to 4.487,5.575 to 4.390,5.608
+line from 4.487,6.013 to 2.987,5.825
+line from 3.084,5.862 to 2.987,5.825 to 3.090,5.813
+line from 2.987,6.200 to 4.487,6.075
+line from 4.386,6.058 to 4.487,6.075 to 4.390,6.108
+line from 4.487,6.450 to 2.987,6.263
+line from 3.084,6.300 to 2.987,6.263 to 3.090,6.250
+line from 2.987,6.700 to 4.487,6.513
+line from 4.385,6.500 to 4.487,6.513 to 4.391,6.550
+line from 1.488,6.950 to 2.987,6.763
+line from 2.885,6.750 to 2.987,6.763 to 2.891,6.800
+line from 2.987,7.700 to 4.487,7.575
+line from 4.386,7.558 to 4.487,7.575 to 4.390,7.608
+line from 4.487,7.950 to 2.987,7.763
+line from 3.084,7.800 to 2.987,7.763 to 3.090,7.750
+line from 2.987,8.637 to 1.488,8.512
+line from 1.585,8.546 to 1.488,8.512 to 1.589,8.496
+line from 1.488,8.887 to 2.987,8.700
+line from 2.885,8.688 to 2.987,8.700 to 2.891,8.737
+line from 2.987,9.637 to 1.488,9.450
+line from 1.584,9.487 to 1.488,9.450 to 1.590,9.438
+line from 1.488,9.950 to 2.987,9.762
+line from 2.885,9.750 to 2.987,9.762 to 2.891,9.800
+dashwid = 0.050i
+line dashed from 4.487,10.137 to 4.487,2.825
+line dashed from 2.987,10.137 to 2.987,2.825
+line dashed from 1.488,10.137 to 1.488,2.825
+.ps
+.ps 12
+.ft
+.ft R
+"(not cached)" at 4.612,3.858 ljust
+.ps
+.ps 14
+"Diagram #3: Write sharing case" at 0.613,2.239 ljust
+.ps
+.ps 12
+"Write syscall" at 4.675,7.546 ljust
+"Read syscall" at 0.550,9.921 ljust
+.ps
+.ps 14
+"Lease valid on machine" at 1.363,2.551 ljust
+.ps
+.ps 12
+"(can still cache)" at 1.675,8.171 ljust
+"Reply" at 3.800,3.858 ljust
+"Write" at 3.175,4.046 ljust
+"writes" at 4.612,4.046 ljust
+"synchronous" at 4.612,4.233 ljust
+"write syscall" at 4.675,5.108 ljust
+"non-caching lease" at 3.175,4.296 ljust
+"Reply " at 3.175,4.483 ljust
+"req" at 3.175,4.983 ljust
+"Get write lease" at 3.175,5.108 ljust
+"Vacated msg" at 3.175,5.483 ljust
+"to the server" at 4.675,5.858 ljust
+"being flushed to" at 4.675,6.046 ljust
+"Delayed writes" at 4.675,6.233 ljust
+.ps
+.ps 16
+"Server" at 2.675,10.182 ljust
+"Client B" at 3.925,10.182 ljust
+"Client A" at 0.863,10.182 ljust
+.ps
+.ps 12
+"(not cached)" at 0.550,4.733 ljust
+"Read data" at 0.550,4.921 ljust
+"Reply data" at 1.675,4.421 ljust
+"Read request" at 1.675,4.921 ljust
+"lease" at 1.675,5.233 ljust
+"Reply non-caching" at 1.675,5.421 ljust
+"Reply" at 3.737,5.733 ljust
+"Write" at 3.175,5.983 ljust
+"Reply" at 3.737,6.171 ljust
+"Write" at 3.175,6.421 ljust
+"Eviction Notice" at 3.175,6.796 ljust
+"Get read lease" at 1.675,7.046 ljust
+"Read syscall" at 0.550,6.983 ljust
+"being cached" at 4.675,7.171 ljust
+"Delayed writes" at 4.675,7.358 ljust
+"lease" at 3.175,7.233 ljust
+"Reply write caching" at 3.175,7.421 ljust
+"Get write lease" at 3.175,7.983 ljust
+"Write syscall" at 4.675,7.983 ljust
+"with same modrev" at 1.675,8.358 ljust
+"Lease" at 0.550,8.171 ljust
+"Renewed" at 0.550,8.358 ljust
+"Reply" at 1.675,8.608 ljust
+"Get Lease Request" at 1.675,8.983 ljust
+"Read syscall" at 0.550,8.733 ljust
+"from cache" at 0.550,9.108 ljust
+"Read syscall" at 0.550,9.296 ljust
+"Reply " at 1.675,9.671 ljust
+"plus lease" at 2.050,9.983 ljust
+"Read Request" at 1.675,10.108 ljust
+.ps
+.ft
+.PE
+.sp
+.)z
+A write-caching lease is not used in the Stanford V Distributed System [Gray89],
+since synchronous writing is always used. A side effect of this change
+is that the five to ten second lease duration recommended by Gray was found
+to be insufficient to achieve good performance for the write-caching lease.
+Experimentation showed that thirty seconds was about optimal for cases where
+the client and server are connected to the same local area network, so
+thirty seconds is the default lease duration for NQNFS.
+A maximum of twice that value is permitted, since Gray showed that for some
+network topologies, a larger lease duration functions better.
+Although there is an explicit get_lease RPC defined for the protocol,
+most lease requests are piggybacked onto the other RPCs to minimize the
+additional overhead introduced by leasing.
+.sh 2 "Rationale"
+.pp
+Leasing was chosen over hard server state information for the following
+reasons:
+.ip 1.
+The server must maintain state information about all current
+client leases.
+Since at most one lease is allocated for each RPC and the leases expire
+after their lease term,
+the upper bound on the number of current leases is the product of the
+lease term and the server RPC rate.
+In practice, it has been observed that less than 10% of RPCs request new leases
+and since most leases have a term of thirty seconds, the following rule of
+thumb should estimate the number of server lease records:
+.sp
+.nf
+ Number of Server Lease Records \(eq 0.1 * 30 * RPC rate
+.fi
+.sp
+Since each lease record occupies 64 bytes of server memory, storing the lease
+records should not be a serious problem.
+If a server has exhausted lease storage, it can simply wait a few seconds
+for a lease to expire and free up a record.
+On the other hand, a Sprite-like server must store records for all files
+currently open by all clients, which can require significant storage for
+a large, heavily loaded server.
+In [Mogul93], it is proposed that a mechanism vaguely similar to paging could be
+used to deal with this for Spritely NFS, but this
+appears to introduce a fair amount of complexity and may limit the
+usefulness of open records for storing other state information, such
+as file locks.
+.ip 2.
+After a server crashes it must recover lease records for
+the current outstanding leases, which actually implies that if it waits
+until all leases have expired, there is no state to recover.
+The server must wait for the maximum lease duration of one minute, and it must serve
+all outstanding write requests resulting from terminated write-caching
+leases before issuing new leases. The one minute delay can be overlapped with
+file system consistency checking (eg. fsck).
+Because no state must be recovered, a lease-based server, like an NFS server,
+avoids the problem of state recovery after a crash.
+.sp
+There can, however, be problems during crash recovery
+because of a potentially large number of write backs due to terminated
+write-caching leases.
+One of these problems is a "recovery storm" [Baker91], which could occur when
+the server is overloaded by the number of write RPC requests.
+The NQNFS protocol deals with this by replying
+with a return status code called
+try_again_later to all
+RPC requests (except write) until the write requests subside.
+At this time, there has not been sufficient testing of server crash
+recovery while under heavy server load to determine if the try_again_later
+reply is a sufficient solution to the problem.
+The other problem is that consistency will be lost if other RPCs are performed
+before all of the write backs for terminated write-caching leases have completed.
+This is handled by only performing write RPCs until
+no write RPC requests arrive
+for write_slack seconds, where write_slack is set to several times
+the client timeout retransmit interval,
+at which time it is assumed all clients have had an opportunity to send their writes
+to the server.
+.ip 3.
+Another advantage of leasing is that, since leases are required at times when other I/O operations occur,
+lease requests can almost always be piggybacked on other RPCs, avoiding some of the
+overhead associated with the explicit open and close RPCs required by a Sprite-like system.
+Compared with Sprite cache consistency,
+this can result in a significantly lower RPC load (see table #1).
+.sh 1 "Limitations of the NQNFS Protocol"
+.pp
+There is a serious risk when leasing is used for delayed write
+caching.
+If the server is simply too busy to service a lease renewal before a write-caching
+lease terminates, the client will not be able to push the write
+data to the server before the lease has terminated, resulting in
+inconsistency.
+Note that the danger of inconsistency occurs when the server assumes that
+a write-caching lease has terminated before the client has
+had the opportunity to write the data back to the server.
+In an effort to avoid this problem, the NQNFS server does not assume that
+a write-caching lease has terminated until three conditions are met:
+.sp
+.(l
+1 - clock time > (expiry time + clock skew)
+2 - there is at least one server daemon (nfsd) waiting for an RPC request
+3 - no write RPCs received for leased file within write_slack after the corrected expiry time
+.)l
+.lp
+The first condition ensures that the lease has expired on the client.
+The clock_skew, by default three seconds, must be
+set to a value larger than the maximum time-of-day clock error that is likely to occur
+during the maximum lease duration.
+The second condition attempts to ensure that the client
+is not waiting for replies to any writes that are still queued for service by
+an nfsd. The third condition tries to guarantee that the client has
+transmitted all write requests to the server, since write_slack is set to
+several times the client's timeout retransmit interval.
+.pp
+There are also certain file system semantics that are problematic for both NFS and NQNFS,
+due to the
+lack of state information maintained by the
+server. If a file is unlinked on one client while open on another it will
+be removed from the file server, resulting in failed file accesses on the
+client that has the file open.
+If the file system on the server is out of space or the client user's disk
+quota has been exceeded, a delayed write can fail long after the write system
+call was successfully completed.
+With NFS this error will be detected by the close system call, since
+the delayed writes are pushed upon close. With NQNFS however, the delayed write
+RPC may not occur until after the close system call, possibly even after the process
+has exited.
+Therefore,
+if a process must check for write errors,
+a system call such as \fIfsync\fR must be used.
+.pp
+Another problem occurs when a process on one client is
+running an executable file
+and a process on another client starts to write to the file. The read lease on
+the first client is terminated by the server, but the client has no recourse but
+to terminate the process, since the process is already in progress on the old
+executable.
+.pp
+The NQNFS protocol does not support file locking, since a file lock would have
+to involve hard, recovered after a crash, state information.
+.sh 1 "Other NQNFS Protocol Features"
+.pp
+NQNFS also includes a variety of minor modifications to the NFS protocol, in an
+attempt to address various limitations.
+The protocol uses 64bit file sizes and offsets in order to handle large files.
+TCP transport may be used as an alternative to UDP
+for cases where UDP does not perform well.
+Transport mechanisms
+such as TCP also permit the use of much larger read/write data sizes,
+which might improve performance in certain environments.
+.pp
+The NQNFS protocol replaces the Readdir RPC with a Readdir_and_Lookup
+RPC that returns the file handle and attributes for each file in the
+directory as well as name and file id number.
+This additional information may then be loaded into the lookup and file-attribute
+caches on the client.
+Thus, for cases such as "ls -l", the \fIstat\fR system calls can be performed
+locally without doing any lookup or getattr RPCs.
+Another additional RPC is the Access RPC that checks for file
+accessibility against the server. This is necessary since in some cases the
+client user ID is mapped to a different user on the server and doing the
+access check locally on the client using file attributes and client credentials is
+not correct.
+One case where this becomes necessary is when the NQNFS mount point is using
+Kerberos authentication, where the Kerberos authentication ticket is translated
+to credentials on the server that are mapped to the client side user id.
+For further details on the protocol, see [Macklem93].
+.sh 1 "Performance"
+.pp
+In order to evaluate the effectiveness of the NQNFS protocol,
+a benchmark was used that was
+designed to typify
+real work on the client workstation.
+Benchmarks, such as Laddis [Wittle93], that perform server load characterization
+are not appropriate for this work, since it is primarily client caching
+efficiency that needs to be evaluated.
+Since these tests are measuring overall client system performance and
+not just the performance of the file system,
+each sequence of runs was performed on identical hardware and operating system in order to factor out the system
+components affecting performance other than the file system protocol.
+.pp
+The equipment used for the all the benchmarks are members of the DECstation\(tm\(dg
+family of workstations using the MIPS\(tm\(sc RISC architecture.
+The operating system running on these systems was a pre-release version of
+4.4BSD Unix\(tm\(dd.
+For all benchmarks, the file server was a DECstation 2100 (10 MIPS) with 8Mbytes of
+memory and a local RZ23 SCSI disk (27msec average access time).
+The clients range in speed from DECstation 2100s
+to a DECstation 5000/25, and always run with six block I/O daemons
+and a 4Mbyte buffer cache, except for the test runs where the
+buffer cache size was the independent variable.
+In all cases /tmp is mounted on the local SCSI disk\**, all machines were
+attached to the same uncongested Ethernet, and ran in single user mode during the benchmarks.
+.(f
+\**Testing using the 4.4BSD MFS [McKusick90] resulted in slightly degraded performance,
+probably since the machines only had 16Mbytes of memory, and so paging
+increased.
+.)f
+Unless noted otherwise, test runs used UDP RPC transport
+and the results given are the average values of four runs.
+.pp
+The benchmark used is the Modified Andrew Benchmark (MAB)
+[Ousterhout90],
+which is a slightly modified version of the benchmark used to characterize
+performance of the Andrew ITC file system [Howard88].
+The MAB was set up with the executable binaries in the remote mounted file
+system and the final load step was commented out, due to a linkage problem
+during testing under 4.4BSD.
+Therefore, these results are not directly comparable to other reported MAB
+results.
+The MAB is made up of five distinct phases:
+.sp
+.ip "1." 10
+Makes five directories (no significant cost)
+.ip "2." 10
+Copy a file system subtree to a working directory
+.ip "3." 10
+Get file attributes (stat) of all the working files
+.ip "4." 10
+Search for strings (grep) in the files
+.ip "5." 10
+Compile a library of C sources and archive them
+.lp
+Of the five phases, the fifth is by far the largest and is the one affected most
+by client caching mechanisms.
+The results for phase #1 are invariant over all
+the caching mechanisms.
+.sh 2 "Buffer Cache Size Tests"
+.pp
+The first experiment was done to see what effect changing the size of the
+buffer cache would have on client performance. A single DECstation 5000/25
+was used to do a series of runs of MAB with different buffer cache sizes
+for four variations of the file system protocol. The four variations are
+as follows:
+.ip "Case 1:" 10
+NFS - The NFS protocol as implemented in 4.4BSD
+.ip "Case 2:" 10
+Leases - The NQNFS protocol using leases for cache consistency
+.ip "Case 3:" 10
+Leases, Rdirlookup - The NQNFS protocol using leases for cache consistency
+and with the readdir RPC replaced by Readdir_and_Lookup
+.ip "Case 4:" 10
+Leases, Attrib leases, Rdirlookup - The NQNFS protocol using leases for
+cache consistency, with the readdir
+RPC replaced by the Readdir_and_Lookup,
+and requiring a valid lease not only for file-data access, but also for file-attribute access.
+.lp
+As can be seen in figure 1, the buffer cache achieves about optimal
+performance for the range of two to ten megabytes in size. At eleven
+megabytes in size, the system pages heavily and the runs did not
+complete in a reasonable time. Even at 64Kbytes, the buffer cache improves
+performance over no buffer cache by a significant margin of 136-148 seconds
+versus 239 seconds.
+This may be due, in part, to the fact that the Compile Phase of the MAB
+uses a rather small working set of file data.
+All variants of NQNFS achieve about
+the same performance, running around 30% faster than NFS, with a slightly
+larger difference for large buffer cache sizes.
+Based on these results, all remaining tests were run with the buffer cache
+size set to 4Mbytes.
+Although I do not know what causes the local peak in the curves between 0.5 and 2 megabytes,
+there is some indication that contention for buffer cache blocks, between the update process
+(which pushes delayed writes to the server every thirty seconds) and the I/O
+system calls, may be involved.
+.(z
+.PS
+.ps
+.ps 10
+dashwid = 0.050i
+line dashed from 0.900,7.888 to 4.787,7.888
+line dashed from 0.900,7.888 to 0.900,10.262
+line from 0.900,7.888 to 0.963,7.888
+line from 4.787,7.888 to 4.725,7.888
+line from 0.900,8.188 to 0.963,8.188
+line from 4.787,8.188 to 4.725,8.188
+line from 0.900,8.488 to 0.963,8.488
+line from 4.787,8.488 to 4.725,8.488
+line from 0.900,8.775 to 0.963,8.775
+line from 4.787,8.775 to 4.725,8.775
+line from 0.900,9.075 to 0.963,9.075
+line from 4.787,9.075 to 4.725,9.075
+line from 0.900,9.375 to 0.963,9.375
+line from 4.787,9.375 to 4.725,9.375
+line from 0.900,9.675 to 0.963,9.675
+line from 4.787,9.675 to 4.725,9.675
+line from 0.900,9.963 to 0.963,9.963
+line from 4.787,9.963 to 4.725,9.963
+line from 0.900,10.262 to 0.963,10.262
+line from 4.787,10.262 to 4.725,10.262
+line from 0.900,7.888 to 0.900,7.950
+line from 0.900,10.262 to 0.900,10.200
+line from 1.613,7.888 to 1.613,7.950
+line from 1.613,10.262 to 1.613,10.200
+line from 2.312,7.888 to 2.312,7.950
+line from 2.312,10.262 to 2.312,10.200
+line from 3.025,7.888 to 3.025,7.950
+line from 3.025,10.262 to 3.025,10.200
+line from 3.725,7.888 to 3.725,7.950
+line from 3.725,10.262 to 3.725,10.200
+line from 4.438,7.888 to 4.438,7.950
+line from 4.438,10.262 to 4.438,10.200
+line from 0.900,7.888 to 4.787,7.888
+line from 4.787,7.888 to 4.787,10.262
+line from 4.787,10.262 to 0.900,10.262
+line from 0.900,10.262 to 0.900,7.888
+line from 3.800,8.775 to 4.025,8.775
+line from 0.925,10.088 to 0.925,10.088
+line from 0.925,10.088 to 0.938,9.812
+line from 0.938,9.812 to 0.988,9.825
+line from 0.988,9.825 to 1.075,9.838
+line from 1.075,9.838 to 1.163,9.938
+line from 1.163,9.938 to 1.250,9.838
+line from 1.250,9.838 to 1.613,9.825
+line from 1.613,9.825 to 2.312,9.750
+line from 2.312,9.750 to 3.025,9.713
+line from 3.025,9.713 to 3.725,9.850
+line from 3.725,9.850 to 4.438,9.875
+dashwid = 0.037i
+line dotted from 3.800,8.625 to 4.025,8.625
+line dotted from 0.925,9.912 to 0.925,9.912
+line dotted from 0.925,9.912 to 0.938,9.887
+line dotted from 0.938,9.887 to 0.988,9.713
+line dotted from 0.988,9.713 to 1.075,9.562
+line dotted from 1.075,9.562 to 1.163,9.562
+line dotted from 1.163,9.562 to 1.250,9.562
+line dotted from 1.250,9.562 to 1.613,9.675
+line dotted from 1.613,9.675 to 2.312,9.363
+line dotted from 2.312,9.363 to 3.025,9.375
+line dotted from 3.025,9.375 to 3.725,9.387
+line dotted from 3.725,9.387 to 4.438,9.450
+line dashed from 3.800,8.475 to 4.025,8.475
+line dashed from 0.925,10.000 to 0.925,10.000
+line dashed from 0.925,10.000 to 0.938,9.787
+line dashed from 0.938,9.787 to 0.988,9.650
+line dashed from 0.988,9.650 to 1.075,9.537
+line dashed from 1.075,9.537 to 1.163,9.613
+line dashed from 1.163,9.613 to 1.250,9.800
+line dashed from 1.250,9.800 to 1.613,9.488
+line dashed from 1.613,9.488 to 2.312,9.375
+line dashed from 2.312,9.375 to 3.025,9.363
+line dashed from 3.025,9.363 to 3.725,9.325
+line dashed from 3.725,9.325 to 4.438,9.438
+dashwid = 0.075i
+line dotted from 3.800,8.325 to 4.025,8.325
+line dotted from 0.925,9.963 to 0.925,9.963
+line dotted from 0.925,9.963 to 0.938,9.750
+line dotted from 0.938,9.750 to 0.988,9.662
+line dotted from 0.988,9.662 to 1.075,9.613
+line dotted from 1.075,9.613 to 1.163,9.613
+line dotted from 1.163,9.613 to 1.250,9.700
+line dotted from 1.250,9.700 to 1.613,9.438
+line dotted from 1.613,9.438 to 2.312,9.463
+line dotted from 2.312,9.463 to 3.025,9.312
+line dotted from 3.025,9.312 to 3.725,9.387
+line dotted from 3.725,9.387 to 4.438,9.425
+.ps
+.ps -1
+.ft
+.ft I
+"0" at 0.825,7.810 rjust
+"20" at 0.825,8.110 rjust
+"40" at 0.825,8.410 rjust
+"60" at 0.825,8.697 rjust
+"80" at 0.825,8.997 rjust
+"100" at 0.825,9.297 rjust
+"120" at 0.825,9.597 rjust
+"140" at 0.825,9.885 rjust
+"160" at 0.825,10.185 rjust
+"0" at 0.900,7.660
+"2" at 1.613,7.660
+"4" at 2.312,7.660
+"6" at 3.025,7.660
+"8" at 3.725,7.660
+"10" at 4.438,7.660
+"Time (sec)" at 0.150,8.997
+"Buffer Cache Size (MBytes)" at 2.837,7.510
+"Figure #1: MAB Phase 5 (compile)" at 2.837,10.335
+"NFS" at 3.725,8.697 rjust
+"Leases" at 3.725,8.547 rjust
+"Leases, Rdirlookup" at 3.725,8.397 rjust
+"Leases, Attrib leases, Rdirlookup" at 3.725,8.247 rjust
+.ps
+.ft
+.PE
+.)z
+.sh 2 "Multiple Client Load Tests"
+.pp
+During preliminary runs of the MAB, it was observed that the server RPC
+counts were reduced significantly by NQNFS as compared to NFS (table 1).
+(Spritely NFS and Ultrix\(tm4.3/NFS numbers were taken from [Mogul93]
+and are not directly comparable, due to numerous differences in the
+experimental setup including deletion of the load step from phase 5.)
+This suggests
+that the NQNFS protocol might scale better with
+respect to the number of clients accessing the server.
+The experiment described in this section
+ran the MAB on from one to ten clients concurrently, to observe the
+effects of heavier server load.
+The clients were started at roughly the same time by pressing all the
+<return> keys together and, although not synchronized beyond that point,
+all clients would finish the test run within about two seconds of each
+other.
+This was not a realistic load of N active clients, but it did
+result in a reproducible increasing client load on the server.
+The results for the four variants
+are plotted in figures 2-5.
+.(z
+.ps -1
+.R
+.TS
+box, center;
+c s s s s s s s
+c c c c c c c c
+l | n n n n n n n.
+Table #1: MAB RPC Counts
+RPC Getattr Read Write Lookup Other GetLease/Open-Close Total
+_
+BSD/NQNFS 277 139 306 575 294 127 1718
+BSD/NFS 1210 506 451 489 238 0 2894
+Spritely NFS 259 836 192 535 306 1467 3595
+Ultrix4.3/NFS 1225 1186 476 810 305 0 4002
+.TE
+.ps
+.)z
+.pp
+For the MAB benchmark, the NQNFS protocol reduces the RPC counts significantly,
+but with a minimum of extra overhead (the GetLease/Open-Close count).
+.(z
+.PS
+.ps
+.ps 10
+dashwid = 0.050i
+line dashed from 0.900,7.888 to 4.787,7.888
+line dashed from 0.900,7.888 to 0.900,10.262
+line from 0.900,7.888 to 0.963,7.888
+line from 4.787,7.888 to 4.725,7.888
+line from 0.900,8.225 to 0.963,8.225
+line from 4.787,8.225 to 4.725,8.225
+line from 0.900,8.562 to 0.963,8.562
+line from 4.787,8.562 to 4.725,8.562
+line from 0.900,8.900 to 0.963,8.900
+line from 4.787,8.900 to 4.725,8.900
+line from 0.900,9.250 to 0.963,9.250
+line from 4.787,9.250 to 4.725,9.250
+line from 0.900,9.588 to 0.963,9.588
+line from 4.787,9.588 to 4.725,9.588
+line from 0.900,9.925 to 0.963,9.925
+line from 4.787,9.925 to 4.725,9.925
+line from 0.900,10.262 to 0.963,10.262
+line from 4.787,10.262 to 4.725,10.262
+line from 0.900,7.888 to 0.900,7.950
+line from 0.900,10.262 to 0.900,10.200
+line from 1.613,7.888 to 1.613,7.950
+line from 1.613,10.262 to 1.613,10.200
+line from 2.312,7.888 to 2.312,7.950
+line from 2.312,10.262 to 2.312,10.200
+line from 3.025,7.888 to 3.025,7.950
+line from 3.025,10.262 to 3.025,10.200
+line from 3.725,7.888 to 3.725,7.950
+line from 3.725,10.262 to 3.725,10.200
+line from 4.438,7.888 to 4.438,7.950
+line from 4.438,10.262 to 4.438,10.200
+line from 0.900,7.888 to 4.787,7.888
+line from 4.787,7.888 to 4.787,10.262
+line from 4.787,10.262 to 0.900,10.262
+line from 0.900,10.262 to 0.900,7.888
+line from 3.800,8.900 to 4.025,8.900
+line from 1.250,8.325 to 1.250,8.325
+line from 1.250,8.325 to 1.613,8.500
+line from 1.613,8.500 to 2.312,8.825
+line from 2.312,8.825 to 3.025,9.175
+line from 3.025,9.175 to 3.725,9.613
+line from 3.725,9.613 to 4.438,10.012
+dashwid = 0.037i
+line dotted from 3.800,8.750 to 4.025,8.750
+line dotted from 1.250,8.275 to 1.250,8.275
+line dotted from 1.250,8.275 to 1.613,8.412
+line dotted from 1.613,8.412 to 2.312,8.562
+line dotted from 2.312,8.562 to 3.025,9.088
+line dotted from 3.025,9.088 to 3.725,9.375
+line dotted from 3.725,9.375 to 4.438,10.000
+line dashed from 3.800,8.600 to 4.025,8.600
+line dashed from 1.250,8.250 to 1.250,8.250
+line dashed from 1.250,8.250 to 1.613,8.438
+line dashed from 1.613,8.438 to 2.312,8.637
+line dashed from 2.312,8.637 to 3.025,9.088
+line dashed from 3.025,9.088 to 3.725,9.525
+line dashed from 3.725,9.525 to 4.438,10.075
+dashwid = 0.075i
+line dotted from 3.800,8.450 to 4.025,8.450
+line dotted from 1.250,8.262 to 1.250,8.262
+line dotted from 1.250,8.262 to 1.613,8.425
+line dotted from 1.613,8.425 to 2.312,8.613
+line dotted from 2.312,8.613 to 3.025,9.137
+line dotted from 3.025,9.137 to 3.725,9.512
+line dotted from 3.725,9.512 to 4.438,9.988
+.ps
+.ps -1
+.ft
+.ft I
+"0" at 0.825,7.810 rjust
+"20" at 0.825,8.147 rjust
+"40" at 0.825,8.485 rjust
+"60" at 0.825,8.822 rjust
+"80" at 0.825,9.172 rjust
+"100" at 0.825,9.510 rjust
+"120" at 0.825,9.847 rjust
+"140" at 0.825,10.185 rjust
+"0" at 0.900,7.660
+"2" at 1.613,7.660
+"4" at 2.312,7.660
+"6" at 3.025,7.660
+"8" at 3.725,7.660
+"10" at 4.438,7.660
+"Time (sec)" at 0.150,8.997
+"Number of Clients" at 2.837,7.510
+"Figure #2: MAB Phase 2 (copying)" at 2.837,10.335
+"NFS" at 3.725,8.822 rjust
+"Leases" at 3.725,8.672 rjust
+"Leases, Rdirlookup" at 3.725,8.522 rjust
+"Leases, Attrib leases, Rdirlookup" at 3.725,8.372 rjust
+.ps
+.ft
+.PE
+.)z
+.(z
+.PS
+.ps
+.ps 10
+dashwid = 0.050i
+line dashed from 0.900,7.888 to 4.787,7.888
+line dashed from 0.900,7.888 to 0.900,10.262
+line from 0.900,7.888 to 0.963,7.888
+line from 4.787,7.888 to 4.725,7.888
+line from 0.900,8.188 to 0.963,8.188
+line from 4.787,8.188 to 4.725,8.188
+line from 0.900,8.488 to 0.963,8.488
+line from 4.787,8.488 to 4.725,8.488
+line from 0.900,8.775 to 0.963,8.775
+line from 4.787,8.775 to 4.725,8.775
+line from 0.900,9.075 to 0.963,9.075
+line from 4.787,9.075 to 4.725,9.075
+line from 0.900,9.375 to 0.963,9.375
+line from 4.787,9.375 to 4.725,9.375
+line from 0.900,9.675 to 0.963,9.675
+line from 4.787,9.675 to 4.725,9.675
+line from 0.900,9.963 to 0.963,9.963
+line from 4.787,9.963 to 4.725,9.963
+line from 0.900,10.262 to 0.963,10.262
+line from 4.787,10.262 to 4.725,10.262
+line from 0.900,7.888 to 0.900,7.950
+line from 0.900,10.262 to 0.900,10.200
+line from 1.613,7.888 to 1.613,7.950
+line from 1.613,10.262 to 1.613,10.200
+line from 2.312,7.888 to 2.312,7.950
+line from 2.312,10.262 to 2.312,10.200
+line from 3.025,7.888 to 3.025,7.950
+line from 3.025,10.262 to 3.025,10.200
+line from 3.725,7.888 to 3.725,7.950
+line from 3.725,10.262 to 3.725,10.200
+line from 4.438,7.888 to 4.438,7.950
+line from 4.438,10.262 to 4.438,10.200
+line from 0.900,7.888 to 4.787,7.888
+line from 4.787,7.888 to 4.787,10.262
+line from 4.787,10.262 to 0.900,10.262
+line from 0.900,10.262 to 0.900,7.888
+line from 3.800,8.775 to 4.025,8.775
+line from 1.250,8.975 to 1.250,8.975
+line from 1.250,8.975 to 1.613,8.963
+line from 1.613,8.963 to 2.312,8.988
+line from 2.312,8.988 to 3.025,9.037
+line from 3.025,9.037 to 3.725,9.062
+line from 3.725,9.062 to 4.438,9.100
+dashwid = 0.037i
+line dotted from 3.800,8.625 to 4.025,8.625
+line dotted from 1.250,9.312 to 1.250,9.312
+line dotted from 1.250,9.312 to 1.613,9.287
+line dotted from 1.613,9.287 to 2.312,9.675
+line dotted from 2.312,9.675 to 3.025,9.262
+line dotted from 3.025,9.262 to 3.725,9.738
+line dotted from 3.725,9.738 to 4.438,9.512
+line dashed from 3.800,8.475 to 4.025,8.475
+line dashed from 1.250,9.400 to 1.250,9.400
+line dashed from 1.250,9.400 to 1.613,9.287
+line dashed from 1.613,9.287 to 2.312,9.575
+line dashed from 2.312,9.575 to 3.025,9.300
+line dashed from 3.025,9.300 to 3.725,9.613
+line dashed from 3.725,9.613 to 4.438,9.512
+dashwid = 0.075i
+line dotted from 3.800,8.325 to 4.025,8.325
+line dotted from 1.250,9.400 to 1.250,9.400
+line dotted from 1.250,9.400 to 1.613,9.412
+line dotted from 1.613,9.412 to 2.312,9.700
+line dotted from 2.312,9.700 to 3.025,9.537
+line dotted from 3.025,9.537 to 3.725,9.938
+line dotted from 3.725,9.938 to 4.438,9.812
+.ps
+.ps -1
+.ft
+.ft I
+"0" at 0.825,7.810 rjust
+"5" at 0.825,8.110 rjust
+"10" at 0.825,8.410 rjust
+"15" at 0.825,8.697 rjust
+"20" at 0.825,8.997 rjust
+"25" at 0.825,9.297 rjust
+"30" at 0.825,9.597 rjust
+"35" at 0.825,9.885 rjust
+"40" at 0.825,10.185 rjust
+"0" at 0.900,7.660
+"2" at 1.613,7.660
+"4" at 2.312,7.660
+"6" at 3.025,7.660
+"8" at 3.725,7.660
+"10" at 4.438,7.660
+"Time (sec)" at 0.150,8.997
+"Number of Clients" at 2.837,7.510
+"Figure #3: MAB Phase 3 (stat/find)" at 2.837,10.335
+"NFS" at 3.725,8.697 rjust
+"Leases" at 3.725,8.547 rjust
+"Leases, Rdirlookup" at 3.725,8.397 rjust
+"Leases, Attrib leases, Rdirlookup" at 3.725,8.247 rjust
+.ps
+.ft
+.PE
+.)z
+.(z
+.PS
+.ps
+.ps 10
+dashwid = 0.050i
+line dashed from 0.900,7.888 to 4.787,7.888
+line dashed from 0.900,7.888 to 0.900,10.262
+line from 0.900,7.888 to 0.963,7.888
+line from 4.787,7.888 to 4.725,7.888
+line from 0.900,8.188 to 0.963,8.188
+line from 4.787,8.188 to 4.725,8.188
+line from 0.900,8.488 to 0.963,8.488
+line from 4.787,8.488 to 4.725,8.488
+line from 0.900,8.775 to 0.963,8.775
+line from 4.787,8.775 to 4.725,8.775
+line from 0.900,9.075 to 0.963,9.075
+line from 4.787,9.075 to 4.725,9.075
+line from 0.900,9.375 to 0.963,9.375
+line from 4.787,9.375 to 4.725,9.375
+line from 0.900,9.675 to 0.963,9.675
+line from 4.787,9.675 to 4.725,9.675
+line from 0.900,9.963 to 0.963,9.963
+line from 4.787,9.963 to 4.725,9.963
+line from 0.900,10.262 to 0.963,10.262
+line from 4.787,10.262 to 4.725,10.262
+line from 0.900,7.888 to 0.900,7.950
+line from 0.900,10.262 to 0.900,10.200
+line from 1.613,7.888 to 1.613,7.950
+line from 1.613,10.262 to 1.613,10.200
+line from 2.312,7.888 to 2.312,7.950
+line from 2.312,10.262 to 2.312,10.200
+line from 3.025,7.888 to 3.025,7.950
+line from 3.025,10.262 to 3.025,10.200
+line from 3.725,7.888 to 3.725,7.950
+line from 3.725,10.262 to 3.725,10.200
+line from 4.438,7.888 to 4.438,7.950
+line from 4.438,10.262 to 4.438,10.200
+line from 0.900,7.888 to 4.787,7.888
+line from 4.787,7.888 to 4.787,10.262
+line from 4.787,10.262 to 0.900,10.262
+line from 0.900,10.262 to 0.900,7.888
+line from 3.800,8.775 to 4.025,8.775
+line from 1.250,9.412 to 1.250,9.412
+line from 1.250,9.412 to 1.613,9.425
+line from 1.613,9.425 to 2.312,9.463
+line from 2.312,9.463 to 3.025,9.600
+line from 3.025,9.600 to 3.725,9.875
+line from 3.725,9.875 to 4.438,10.075
+dashwid = 0.037i
+line dotted from 3.800,8.625 to 4.025,8.625
+line dotted from 1.250,9.450 to 1.250,9.450
+line dotted from 1.250,9.450 to 1.613,9.438
+line dotted from 1.613,9.438 to 2.312,9.438
+line dotted from 2.312,9.438 to 3.025,9.525
+line dotted from 3.025,9.525 to 3.725,9.550
+line dotted from 3.725,9.550 to 4.438,9.662
+line dashed from 3.800,8.475 to 4.025,8.475
+line dashed from 1.250,9.438 to 1.250,9.438
+line dashed from 1.250,9.438 to 1.613,9.412
+line dashed from 1.613,9.412 to 2.312,9.450
+line dashed from 2.312,9.450 to 3.025,9.500
+line dashed from 3.025,9.500 to 3.725,9.613
+line dashed from 3.725,9.613 to 4.438,9.675
+dashwid = 0.075i
+line dotted from 3.800,8.325 to 4.025,8.325
+line dotted from 1.250,9.387 to 1.250,9.387
+line dotted from 1.250,9.387 to 1.613,9.600
+line dotted from 1.613,9.600 to 2.312,9.625
+line dotted from 2.312,9.625 to 3.025,9.738
+line dotted from 3.025,9.738 to 3.725,9.850
+line dotted from 3.725,9.850 to 4.438,9.800
+.ps
+.ps -1
+.ft
+.ft I
+"0" at 0.825,7.810 rjust
+"5" at 0.825,8.110 rjust
+"10" at 0.825,8.410 rjust
+"15" at 0.825,8.697 rjust
+"20" at 0.825,8.997 rjust
+"25" at 0.825,9.297 rjust
+"30" at 0.825,9.597 rjust
+"35" at 0.825,9.885 rjust
+"40" at 0.825,10.185 rjust
+"0" at 0.900,7.660
+"2" at 1.613,7.660
+"4" at 2.312,7.660
+"6" at 3.025,7.660
+"8" at 3.725,7.660
+"10" at 4.438,7.660
+"Time (sec)" at 0.150,8.997
+"Number of Clients" at 2.837,7.510
+"Figure #4: MAB Phase 4 (grep/wc/find)" at 2.837,10.335
+"NFS" at 3.725,8.697 rjust
+"Leases" at 3.725,8.547 rjust
+"Leases, Rdirlookup" at 3.725,8.397 rjust
+"Leases, Attrib leases, Rdirlookup" at 3.725,8.247 rjust
+.ps
+.ft
+.PE
+.)z
+.(z
+.PS
+.ps
+.ps 10
+dashwid = 0.050i
+line dashed from 0.900,7.888 to 4.787,7.888
+line dashed from 0.900,7.888 to 0.900,10.262
+line from 0.900,7.888 to 0.963,7.888
+line from 4.787,7.888 to 4.725,7.888
+line from 0.900,8.150 to 0.963,8.150
+line from 4.787,8.150 to 4.725,8.150
+line from 0.900,8.412 to 0.963,8.412
+line from 4.787,8.412 to 4.725,8.412
+line from 0.900,8.675 to 0.963,8.675
+line from 4.787,8.675 to 4.725,8.675
+line from 0.900,8.938 to 0.963,8.938
+line from 4.787,8.938 to 4.725,8.938
+line from 0.900,9.213 to 0.963,9.213
+line from 4.787,9.213 to 4.725,9.213
+line from 0.900,9.475 to 0.963,9.475
+line from 4.787,9.475 to 4.725,9.475
+line from 0.900,9.738 to 0.963,9.738
+line from 4.787,9.738 to 4.725,9.738
+line from 0.900,10.000 to 0.963,10.000
+line from 4.787,10.000 to 4.725,10.000
+line from 0.900,10.262 to 0.963,10.262
+line from 4.787,10.262 to 4.725,10.262
+line from 0.900,7.888 to 0.900,7.950
+line from 0.900,10.262 to 0.900,10.200
+line from 1.613,7.888 to 1.613,7.950
+line from 1.613,10.262 to 1.613,10.200
+line from 2.312,7.888 to 2.312,7.950
+line from 2.312,10.262 to 2.312,10.200
+line from 3.025,7.888 to 3.025,7.950
+line from 3.025,10.262 to 3.025,10.200
+line from 3.725,7.888 to 3.725,7.950
+line from 3.725,10.262 to 3.725,10.200
+line from 4.438,7.888 to 4.438,7.950
+line from 4.438,10.262 to 4.438,10.200
+line from 0.900,7.888 to 4.787,7.888
+line from 4.787,7.888 to 4.787,10.262
+line from 4.787,10.262 to 0.900,10.262
+line from 0.900,10.262 to 0.900,7.888
+line from 3.800,8.675 to 4.025,8.675
+line from 1.250,8.800 to 1.250,8.800
+line from 1.250,8.800 to 1.613,8.912
+line from 1.613,8.912 to 2.312,9.113
+line from 2.312,9.113 to 3.025,9.438
+line from 3.025,9.438 to 3.725,9.750
+line from 3.725,9.750 to 4.438,10.088
+dashwid = 0.037i
+line dotted from 3.800,8.525 to 4.025,8.525
+line dotted from 1.250,8.637 to 1.250,8.637
+line dotted from 1.250,8.637 to 1.613,8.700
+line dotted from 1.613,8.700 to 2.312,8.713
+line dotted from 2.312,8.713 to 3.025,8.775
+line dotted from 3.025,8.775 to 3.725,8.887
+line dotted from 3.725,8.887 to 4.438,9.037
+line dashed from 3.800,8.375 to 4.025,8.375
+line dashed from 1.250,8.675 to 1.250,8.675
+line dashed from 1.250,8.675 to 1.613,8.688
+line dashed from 1.613,8.688 to 2.312,8.713
+line dashed from 2.312,8.713 to 3.025,8.825
+line dashed from 3.025,8.825 to 3.725,8.887
+line dashed from 3.725,8.887 to 4.438,9.062
+dashwid = 0.075i
+line dotted from 3.800,8.225 to 4.025,8.225
+line dotted from 1.250,8.700 to 1.250,8.700
+line dotted from 1.250,8.700 to 1.613,8.688
+line dotted from 1.613,8.688 to 2.312,8.762
+line dotted from 2.312,8.762 to 3.025,8.812
+line dotted from 3.025,8.812 to 3.725,8.925
+line dotted from 3.725,8.925 to 4.438,9.025
+.ps
+.ps -1
+.ft
+.ft I
+"0" at 0.825,7.810 rjust
+"50" at 0.825,8.072 rjust
+"100" at 0.825,8.335 rjust
+"150" at 0.825,8.597 rjust
+"200" at 0.825,8.860 rjust
+"250" at 0.825,9.135 rjust
+"300" at 0.825,9.397 rjust
+"350" at 0.825,9.660 rjust
+"400" at 0.825,9.922 rjust
+"450" at 0.825,10.185 rjust
+"0" at 0.900,7.660
+"2" at 1.613,7.660
+"4" at 2.312,7.660
+"6" at 3.025,7.660
+"8" at 3.725,7.660
+"10" at 4.438,7.660
+"Time (sec)" at 0.150,8.997
+"Number of Clients" at 2.837,7.510
+"Figure #5: MAB Phase 5 (compile)" at 2.837,10.335
+"NFS" at 3.725,8.597 rjust
+"Leases" at 3.725,8.447 rjust
+"Leases, Rdirlookup" at 3.725,8.297 rjust
+"Leases, Attrib leases, Rdirlookup" at 3.725,8.147 rjust
+.ps
+.ft
+.PE
+.)z
+.pp
+In figure 2, where a subtree of seventy small files is copied, the difference between the protocol variants is minimal,
+with the NQNFS variants performing slightly better.
+For this case, the Readdir_and_Lookup RPC is a slight hindrance under heavy
+load, possibly because it results in larger directory blocks in the buffer
+cache.
+.pp
+In figure 3, for the phase that gets file attributes for a large number
+of files, the leasing variants take about 50% longer, indicating that
+there are performance problems in this area. For the case where valid
+current leases are required for every file when attributes are returned,
+the performance is significantly worse than when the attributes are allowed
+to be stale by a few seconds on the client.
+I have not been able to explain the oscillation in the curves for the
+Lease cases.
+.pp
+For the string searching phase depicted in figure 4, the leasing variants
+that do not require valid leases for files when attributes are returned
+appear to scale better with server load than NFS.
+However, the effect appears to be
+negligible until the server load is fairly heavy.
+.pp
+Most of the time in the MAB benchmark is spent in the compilation phase
+and this is where the differences between caching methods are most
+pronounced.
+In figure 5 it can be seen that any protocol variant using Leases performs
+about a factor of two better than NFS
+at a load of ten clients. This indicates that the use of NQNFS may
+allow servers to handle significantly more clients for this type of
+workload.
+.pp
+Table 2 summarizes the MAB run times for all phases for the single client
+DECstation 5000/25. The \fILeases\fR case refers to using leases, whereas
+the \fILeases, Rdirl\fR case uses the Readdir_and_Lookup RPC as well and
+the \fIBCache Only\fR case uses leases, but only the buffer cache and not
+the attribute or name caches.
+The \fINo Caching\fR cases does not do any client side caching, performing
+all system calls via synchronous RPCs to the server.
+.(z
+.ps -1
+.R
+.TS
+box, center;
+c s s s s s s
+c c c c c c c c
+l | n n n n n n n.
+Table #2: Single DECstation 5000/25 Client Elapsed Times (sec)
+Phase 1 2 3 4 5 Total % Improvement
+_
+No Caching 6 35 41 40 258 380 -93
+NFS 5 24 15 20 133 197 0
+BCache Only 5 20 24 23 116 188 5
+Leases, Rdirl 5 20 21 20 105 171 13
+Leases 5 19 21 21 99 165 16
+.TE
+.ps
+.)z
+.sh 2 "Processor Speed Tests"
+.pp
+An important goal of client-side file system caching is to decouple the
+I/O system calls from the underlying distributed file system, so that the
+client's system performance might scale with processor speed. In order
+to test this, a series of MAB runs were performed on three
+DECstations that are similar except for processor speed.
+In addition to the four protocol variants used for the above tests, runs
+were done with the client caches turned off, for
+worst case performance numbers for caching mechanisms with a 100% miss rate. The CPU utilization
+was measured, as an indicator of how much the processor was blocking for
+I/O system calls. Note that since the systems were running in single user mode
+and otherwise quiescent, almost all CPU activity was directly related
+to the MAB run.
+The results are presented in
+table 3.
+The CPU time is simply the product of the CPU utilization and
+elapsed running time and, as such, is the optimistic bound on performance
+achievable with an ideal client caching scheme that never blocks for I/O.
+.(z
+.ps -1
+.R
+.TS
+box, center;
+c s s s s s s s s s
+c c s s c s s c s s
+c c c c c c c c c c
+c c c c c c c c c c
+l | n n n n n n n n n.
+Table #3: MAB Phase 5 (compile)
+ DS2100 (10.5 MIPS) DS3100 (14.0 MIPS) DS5000/25 (26.7 MIPS)
+ Elapsed CPU CPU Elapsed CPU CPU Elapsed CPU CPU
+ time Util(%) time time Util(%) time time Util(%) time
+_
+Leases 143 89 127 113 87 98 99 89 88
+Leases, Rdirl 150 89 134 110 91 100 105 88 92
+BCache Only 169 85 144 129 78 101 116 75 87
+NFS 172 77 132 135 74 100 133 71 94
+No Caching 330 47 155 256 41 105 258 39 101
+.TE
+.ps
+.)z
+As can be seen in the table, any caching mechanism achieves significantly
+better performance than when caching is disabled, roughly doubling the CPU
+utilization with a corresponding reduction in run time. For NFS, the CPU
+utilization is dropping with increase in CPU speed, which would suggest that
+it is not scaling with CPU speed. For the NQNFS variants, the CPU utilization
+remains at just below 90%, which suggests that the caching mechanism is working
+well and scaling within this CPU range.
+Note that for this benchmark, the ratio of CPU times for
+the DECstation 3100 and DECstation 5000/25 are quite different than the
+Dhrystone MIPS ratings would suggest.
+.pp
+Overall, the results seem encouraging, although it remains to be seen whether
+or not the caching provided by NQNFS can continue to scale with CPU
+performance.
+There is a good indication that NQNFS permits a server to scale
+to more clients than does NFS, at least for workloads akin to the MAB compile phase.
+A more difficult question is "What if the server is much faster doing
+write RPCs?" as a result of some technology such as Prestoserve
+or write gathering.
+Since a significant part of the difference between NFS and NQNFS is
+the synchronous writing, it is difficult to predict how much a server
+capable of fast write RPCs will negate the performance improvements of NQNFS.
+At the very least, table 1 indicates that the write RPC load on the server
+has decreased by approximately 30%, and this reduced write load should still
+result in some improvement.
+.pp
+Indications are that the Readdir_and_Lookup RPC has not improved performance
+for these tests and may in fact be degrading performance slightly.
+The results in figure 3 indicate some problems, possibly with handling
+of the attribute cache. It seems logical that the Readdir_and_Lookup RPC
+should be permit priming of the attribute cache improving hit rate, but the
+results are counter to that.
+.sh 2 "Internetwork Delay Tests"
+.pp
+This experimental setup was used to explore how the different protocol
+variants might perform over internetworks with larger RPC RTTs. The
+server was moved to a separate Ethernet, using a MicroVAXII\(tm as an
+IP router to the other Ethernet. The 4.3Reno BSD Unix system running on the
+MicroVAXII was modified to delay IP packets being forwarded by a tunable N
+millisecond delay. The implementation was rather crude and did not try to
+simulate a distribution of delay times nor was it programmed to drop packets
+at a given rate, but it served as a simple emulation of a long,
+fat network\** [Jacobson88].
+.(f
+\**Long fat networks refer to network interconnections with
+a Bandwidth X RTT product > 10\u5\d bits.
+.)f
+The MAB was run using both UDP and TCP RPC transports
+for a variety of RTT delays from five to two hundred milliseconds,
+to observe the effects of RTT delay on RPC transport.
+It was found that, due to a high variability between runs, four runs was not
+suffice, so eight runs at each value was done.
+The results in figure 6 and table 4 are the average for the eight runs.
+.(z
+.PS
+.ps
+.ps 10
+dashwid = 0.050i
+line dashed from 0.900,7.888 to 4.787,7.888
+line dashed from 0.900,7.888 to 0.900,10.262
+line from 0.900,7.888 to 0.963,7.888
+line from 4.787,7.888 to 4.725,7.888
+line from 0.900,8.350 to 0.963,8.350
+line from 4.787,8.350 to 4.725,8.350
+line from 0.900,8.800 to 0.963,8.800
+line from 4.787,8.800 to 4.725,8.800
+line from 0.900,9.262 to 0.963,9.262
+line from 4.787,9.262 to 4.725,9.262
+line from 0.900,9.713 to 0.963,9.713
+line from 4.787,9.713 to 4.725,9.713
+line from 0.900,10.175 to 0.963,10.175
+line from 4.787,10.175 to 4.725,10.175
+line from 0.900,7.888 to 0.900,7.950
+line from 0.900,10.262 to 0.900,10.200
+line from 1.825,7.888 to 1.825,7.950
+line from 1.825,10.262 to 1.825,10.200
+line from 2.750,7.888 to 2.750,7.950
+line from 2.750,10.262 to 2.750,10.200
+line from 3.675,7.888 to 3.675,7.950
+line from 3.675,10.262 to 3.675,10.200
+line from 4.600,7.888 to 4.600,7.950
+line from 4.600,10.262 to 4.600,10.200
+line from 0.900,7.888 to 4.787,7.888
+line from 4.787,7.888 to 4.787,10.262
+line from 4.787,10.262 to 0.900,10.262
+line from 0.900,10.262 to 0.900,7.888
+line from 4.125,8.613 to 4.350,8.613
+line from 0.988,8.400 to 0.988,8.400
+line from 0.988,8.400 to 1.637,8.575
+line from 1.637,8.575 to 2.375,8.713
+line from 2.375,8.713 to 3.125,8.900
+line from 3.125,8.900 to 3.862,9.137
+line from 3.862,9.137 to 4.600,9.425
+dashwid = 0.037i
+line dotted from 4.125,8.463 to 4.350,8.463
+line dotted from 0.988,8.375 to 0.988,8.375
+line dotted from 0.988,8.375 to 1.637,8.525
+line dotted from 1.637,8.525 to 2.375,8.850
+line dotted from 2.375,8.850 to 3.125,8.975
+line dotted from 3.125,8.975 to 3.862,9.137
+line dotted from 3.862,9.137 to 4.600,9.625
+line dashed from 4.125,8.312 to 4.350,8.312
+line dashed from 0.988,8.525 to 0.988,8.525
+line dashed from 0.988,8.525 to 1.637,8.688
+line dashed from 1.637,8.688 to 2.375,8.838
+line dashed from 2.375,8.838 to 3.125,9.150
+line dashed from 3.125,9.150 to 3.862,9.275
+line dashed from 3.862,9.275 to 4.600,9.588
+dashwid = 0.075i
+line dotted from 4.125,8.162 to 4.350,8.162
+line dotted from 0.988,8.525 to 0.988,8.525
+line dotted from 0.988,8.525 to 1.637,8.838
+line dotted from 1.637,8.838 to 2.375,8.863
+line dotted from 2.375,8.863 to 3.125,9.137
+line dotted from 3.125,9.137 to 3.862,9.387
+line dotted from 3.862,9.387 to 4.600,10.200
+.ps
+.ps -1
+.ft
+.ft I
+"0" at 0.825,7.810 rjust
+"100" at 0.825,8.272 rjust
+"200" at 0.825,8.722 rjust
+"300" at 0.825,9.185 rjust
+"400" at 0.825,9.635 rjust
+"500" at 0.825,10.097 rjust
+"0" at 0.900,7.660
+"50" at 1.825,7.660
+"100" at 2.750,7.660
+"150" at 3.675,7.660
+"200" at 4.600,7.660
+"Time (sec)" at 0.150,8.997
+"Round Trip Delay (msec)" at 2.837,7.510
+"Figure #6: MAB Phase 5 (compile)" at 2.837,10.335
+"Leases,UDP" at 4.050,8.535 rjust
+"Leases,TCP" at 4.050,8.385 rjust
+"NFS,UDP" at 4.050,8.235 rjust
+"NFS,TCP" at 4.050,8.085 rjust
+.ps
+.ft
+.PE
+.)z
+.(z
+.ps -1
+.R
+.TS
+box, center;
+c s s s s s s s s
+c c s c s c s c s
+c c c c c c c c c
+c c c c c c c c c
+l | n n n n n n n n.
+Table #4: MAB Phase 5 (compile) for Internetwork Delays
+ NFS,UDP NFS,TCP Leases,UDP Leases,TCP
+Delay Elapsed Standard Elapsed Standard Elapsed Standard Elapsed Standard
+(msec) time (sec) Deviation time (sec) Deviation time (sec) Deviation time (sec) Deviation
+_
+5 139 2.9 139 2.4 112 7.0 108 6.0
+40 175 5.1 208 44.5 150 23.8 139 4.3
+80 207 3.9 213 4.7 180 7.7 210 52.9
+120 276 29.3 273 17.1 221 7.7 238 5.8
+160 304 7.2 328 77.1 275 21.5 274 10.1
+200 372 35.0 506 235.1 338 25.2 379 69.2
+.TE
+.ps
+.)z
+.pp
+I found these results somewhat surprising, since I had assumed that stability
+across an internetwork connection would be a function of RPC transport
+protocol.
+Looking at the standard deviations observed between the eight runs, there is an indication
+that the NQNFS protocol plays a larger role in
+maintaining stability than the underlying RPC transport protocol.
+It appears that NFS over TCP transport
+is the least stable variant tested.
+It should be noted that the TCP implementation used was roughly at 4.3BSD Tahoe
+release and that the 4.4BSD TCP implementation was far less stable and would
+fail intermittently, due to a bug I was not able to isolate.
+It would appear that some of the recent enhancements to the 4.4BSD TCP
+implementation have a detrimental effect on the performance of
+RPC-type traffic loads, which intermix small and large
+data transfers in both directions.
+It is obvious that more exploration of this area is needed before any
+conclusions can be made
+beyond the fact that over a local area network, TCP transport provides
+performance comparable to UDP.
+.sh 1 "Lessons Learned"
+.pp
+Evaluating the performance of a distributed file system is fraught with
+difficulties, due to the many software and hardware factors involved.
+The limited benchmarking presented here took a considerable amount of time
+and the results gained by the exercise only give indications of what the
+performance might be for a few scenarios.
+.pp
+The IP router with delay introduction proved to be a valuable tool for protocol debugging\**,
+.(f
+\**It exposed two bugs in the 4.4BSD networking, one a problem in the Lance chip
+driver for the DECstation and the other a TCP window sizing problem that I was
+not able to isolate.
+.)f
+and may be useful for a more extensive study of performance over internetworks
+if enhanced to do a better job of simulating internetwork delay and packet loss.
+.pp
+The Leases mechanism provided a simple model for the provision of cache
+consistency and did seem to improve performance for various scenarios.
+Unfortunately, it does not provide the server state information that is required
+for file system semantics, such as locking, that many software systems demand.
+In production environments on my campus, the need for file locking and the correct
+generation of the ETXTBSY error code
+are far more important that full cache consistency, and leasing
+does not satisfy these needs.
+Another file system semantic that requires hard server state is the delay
+of file removal until the last close system call. Although Spritely NFS
+did not support this semantic either, it is logical that the open file
+state maintained by that system would facilitate the implementation of
+this semantic more easily than would the Leases mechanism.
+.sh 1 "Further Work"
+.pp
+The current implementation uses a fixed, moderate sized buffer cache designed
+for the local UFS [McKusick84] file system.
+The results in figure 1 suggest that this is adequate so long as the cache
+is of an appropriate size.
+However, a mechanism permitting the cache to vary in size
+has been shown to outperform fixed sized buffer caches [Nelson90], and could
+be beneficial. It could also be useful to allow the buffer cache to grow very
+large by making use of local backing store for cases where server performance
+is limited.
+A very large buffer cache size would in turn permit experimentation with
+much larger read/write data sizes, facilitating bulk data transfers
+across long fat networks, such as will characterize the Internet of the
+near future.
+A careful redesign of the buffer cache mechanism to provide
+support for these features would probably be the next implementation step.
+.pp
+The results in figure 3 indicate that the mechanics of caching file
+attributes and maintaining the attribute cache's consistency needs to
+be looked at further.
+There also needs to be more work done on the interaction between a
+Readdir_and_Lookup RPC and the name and attribute caches, in an effort
+to reduce Getattr and Lookup RPC loads.
+.pp
+The NQNFS protocol has never been used in a production environment and doing
+so would provide needed insight into how well the protocol saisfies the
+needs of real workstation environments.
+It is hoped that the distribution of the implementation in 4.4BSD will
+facilitate use of the protocol in production environments elsewhere.
+.pp
+The big question that needs to be resolved is whether Leases are an adequate
+mechanism for cache consistency or whether hard server state is required.
+Given the work presented here and in the papers related to Sprite and Spritely
+NFS, there are clear indications that a cache consistency algorithm can
+improve both performance and file system semantics.
+As yet, however, it is unclear what the best approach to maintain consistency is.
+It would appear that hard state information is required for file locking and
+other mechanisms and, if so, it seems appropriate to use it for cache
+consistency as well.
+.sh 1 "Acknowledgements"
+.pp
+I would like to thank the members of the CSRG at the University of California,
+Berkeley for their continued support over the years. Without their encouragement and assistance this
+software would never have been implemented.
+Prof. Jim Linders and Prof. Tom Wilson here at the University of Guelph helped
+proofread this paper and Jeffrey Mogul provided a great deal of
+assistance, helping to turn my gibberish into something at least moderately
+readable.
+.sh 1 "References"
+.ip [Baker91] 15
+Mary Baker and John Ousterhout, Availability in the Sprite Distributed
+File System, In \fIOperating System Review\fR, (25)2, pg. 95-98,
+April 1991.
+.ip [Baker91a] 15
+Mary Baker, private communication, May 1991.
+.ip [Burrows88] 15
+Michael Burrows, Efficient Data Sharing, Technical Report #153,
+Computer Laboratory, University of Cambridge, Dec. 1988.
+.ip [Gray89] 15
+Cary G. Gray and David R. Cheriton, Leases: An Efficient Fault-Tolerant
+Mechanism for Distributed File Cache Consistency, In \fIProc. of the
+Twelfth ACM Symposium on Operating Systems Principals\fR, Litchfield Park,
+AZ, Dec. 1989.
+.ip [Howard88] 15
+John H. Howard, Michael L. Kazar, Sherri G. Menees, David A. Nichols,
+M. Satyanarayanan, Robert N. Sidebotham and Michael J. West,
+Scale and Performance in a Distributed File System, \fIACM Trans. on
+Computer Systems\fR, (6)1, pg 51-81, Feb. 1988.
+.ip [Jacobson88] 15
+Van Jacobson and R. Braden, \fITCP Extensions for Long-Delay Paths\fR,
+ARPANET Working Group Requests for Comment, DDN Network Information Center,
+SRI International, Menlo Park, CA, October 1988, RFC-1072.
+.ip [Jacobson89] 15
+Van Jacobson, Sun NFS Performance Problems, \fIPrivate Communication,\fR
+November, 1989.
+.ip [Juszczak89] 15
+Chet Juszczak, Improving the Performance and Correctness of an NFS Server,
+In \fIProc. Winter 1989 USENIX Conference,\fR pg. 53-63, San Diego, CA, January 1989.
+.ip [Juszczak94] 15
+Chet Juszczak, Improving the Write Performance of an NFS Server,
+to appear in \fIProc. Winter 1994 USENIX Conference,\fR San Francisco, CA, January 1994.
+.ip [Kazar88] 15
+Michael L. Kazar, Synchronization and Caching Issues in the Andrew File System,
+In \fIProc. Winter 1988 USENIX Conference,\fR pg. 27-36, Dallas, TX, February
+1988.
+.ip [Kent87] 15
+Christopher. A. Kent and Jeffrey C. Mogul, \fIFragmentation Considered Harmful\fR, Research Report 87/3,
+Digital Equipment Corporation Western Research Laboratory, Dec. 1987.
+.ip [Kent87a] 15
+Christopher. A. Kent, \fICache Coherence in Distributed Systems\fR, Research Report 87/4,
+Digital Equipment Corporation Western Research Laboratory, April 1987.
+.ip [Macklem90] 15
+Rick Macklem, Lessons Learned Tuning the 4.3BSD Reno Implementation of the
+NFS Protocol,
+In \fIProc. Winter 1991 USENIX Conference,\fR pg. 53-64, Dallas, TX,
+January 1991.
+.ip [Macklem93] 15
+Rick Macklem, The 4.4BSD NFS Implementation,
+In \fIThe System Manager's Manual\fR, 4.4 Berkeley Software Distribution,
+University of California, Berkeley, June 1993.
+.ip [McKusick84] 15
+Marshall K. McKusick, William N. Joy, Samuel J. Leffler and Robert S. Fabry,
+A Fast File System for UNIX, \fIACM Transactions on Computer Systems\fR,
+Vol. 2, Number 3, pg. 181-197, August 1984.
+.ip [McKusick90] 15
+Marshall K. McKusick, Michael J. Karels and Keith Bostic, A Pageable Memory
+Based Filesystem,
+In \fIProc. Summer 1990 USENIX Conference,\fR pg. 137-143, Anaheim, CA, June
+1990.
+.ip [Mogul93] 15
+Jeffrey C. Mogul, Recovery in Spritely NFS,
+Research Report 93/2, Digital Equipment Corporation Western Research
+Laboratory, June 1993.
+.ip [Moran90] 15
+Joseph Moran, Russel Sandberg, Don Coleman, Jonathan Kepecs and Bob Lyon,
+Breaking Through the NFS Performance Barrier,
+In \fIProc. Spring 1990 EUUG Conference,\fR pg. 199-206, Munich, FRG,
+April 1990.
+.ip [Nelson88] 15
+Michael N. Nelson, Brent B. Welch, and John K. Ousterhout, Caching in the
+Sprite Network File System, \fIACM Transactions on Computer Systems\fR (6)1
+pg. 134-154, February 1988.
+.ip [Nelson90] 15
+Michael N. Nelson, \fIVirtual Memory vs. The File System\fR, Research Report
+90/4, Digital Equipment Corporation Western Research Laboratory, March 1990.
+.ip [Nowicki89] 15
+Bill Nowicki, Transport Issues in the Network File System, In \fIComputer
+Communication Review\fR, pg. 16-20, March 1989.
+.ip [Ousterhout90] 15
+John K. Ousterhout, Why Aren't Operating Systems Getting Faster As Fast as
+Hardware? In \fIProc. Summer 1990 USENIX Conference\fR, pg. 247-256, Anaheim,
+CA, June 1990.
+.ip [Sandberg85] 15
+Russel Sandberg, David Goldberg, Steve Kleiman, Dan Walsh, and Bob Lyon,
+Design and Implementation of the Sun Network filesystem, In \fIProc. Summer
+1985 USENIX Conference\fR, pages 119-130, Portland, OR, June 1985.
+.ip [Srinivasan89] 15
+V. Srinivasan and Jeffrey. C. Mogul, Spritely NFS: Experiments with
+Cache-Consistency Protocols,
+In \fIProc. of the
+Twelfth ACM Symposium on Operating Systems Principals\fR, Litchfield Park,
+AZ, Dec. 1989.
+.ip [Steiner88] 15
+J. G. Steiner, B. C. Neuman and J. I. Schiller, Kerberos: An Authentication
+Service for Open Network Systems,
+In \fIProc. Winter 1988 USENIX Conference,\fR pg. 191-202, Dallas, TX, February
+1988.
+.ip [SUN89] 15
+Sun Microsystems Inc., \fINFS: Network File System Protocol Specification\fR,
+ARPANET Working Group Requests for Comment, DDN Network Information Center,
+SRI International, Menlo Park, CA, March 1989, RFC-1094.
+.ip [SUN93] 15
+Sun Microsystems Inc., \fINFS: Network File System Version 3 Protocol Specification\fR,
+Sun Microsystems Inc., Mountain View, CA, June 1993.
+.ip [Wittle93] 15
+Mark Wittle and Bruce E. Keith, LADDIS: The Next Generation in NFS File
+Server Benchmarking,
+In \fIProc. Summer 1993 USENIX Conference,\fR pg. 111-128, Cincinnati, OH, June
+1993.
+.(f
+\(mo
+NFS is believed to be a trademark of Sun Microsystems, Inc.
+.)f
+.(f
+\(dg
+Prestoserve is a trademark of Legato Systems, Inc.
+.)f
+.(f
+\(sc
+MIPS is a trademark of Silicon Graphics, Inc.
+.)f
+.(f
+\(dg
+DECstation, MicroVAXII and Ultrix are trademarks of Digital Equipment Corp.
+.)f
+.(f
+\(dd
+Unix is a trademark of Novell, Inc.
+.)f
diff --git a/share/doc/papers/px/Makefile b/share/doc/papers/px/Makefile
new file mode 100644
index 0000000..33bb3f1
--- /dev/null
+++ b/share/doc/papers/px/Makefile
@@ -0,0 +1,15 @@
+# @(#)Makefile 5.3 (Berkeley) 6/8/93
+
+DIR= papers/px
+SRCS= pxin0.n pxin1.n pxin2.n pxin3.n pxin4.n
+EXTRA= fig1.1.n fig1.2.n fig1.3.n fig2.3.raw fig2.4.n fig3.2.n \
+ fig3.3.n table2.1.n table2.2.n table2.3.n table3.1.n tmac.p
+CLEANFILES+=fig2.3.n
+
+paper.ps: ${SRCS} fig2.3.n
+ ${SOELIM} ${SRCS} | ${TBL} | ${ROFF} > ${.TARGET}
+
+fig2.3.n: fig2.3.raw
+ sort fig2.3.raw >fig2.3.n
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/px/fig1.1.n b/share/doc/papers/px/fig1.1.n
new file mode 100644
index 0000000..290777e
--- /dev/null
+++ b/share/doc/papers/px/fig1.1.n
@@ -0,0 +1,71 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)fig1.1.n 5.2 (Berkeley) 4/17/91
+.\"
+.KF
+.TS
+center;
+c l
+l l
+_ l
+| l |
+| cw(18) | aw(28)
+| _ | l
+| c | a.
+Base of stack frame
+
+
+
+Block mark Positive offsets
+.sp
+ \(<- Display entry points here
+.sp
+Local
+variables
+.sp
+_ Negative offsets
+Temporary
+expression
+space
+.sp
+.T&
+| _ | l
+c l.
+
+.sp
+Top of stack frame
+.TE
+.sp
+.ce
+Figure 1.1 \- Structure of stack frame
+.sp
+.KE
diff --git a/share/doc/papers/px/fig1.2.n b/share/doc/papers/px/fig1.2.n
new file mode 100644
index 0000000..4f835b7
--- /dev/null
+++ b/share/doc/papers/px/fig1.2.n
@@ -0,0 +1,68 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)fig1.2.n 5.2 (Berkeley) 4/17/91
+.\"
+.KF
+.TS
+center;
+l l
+| cw(22n) | aw(20n).
+_ \&
+ Created by \s-2CALL\s0
+Saved lino
+.sp
+Saved lc
+.sp
+Saved dp
+.sp
+_ \&
+ Created by \s-2BEG\s0
+Saved dp contents
+.sp
+Pointer to current
+entry line and
+section name
+.sp
+Current file name
+and buffer
+.sp
+Top of stack reference
+.sp
+.T&
+| _ | l.
+
+.TE
+.sp
+.ce
+Figure 1.2 \- Block mark structure
+.sp
+.KE
diff --git a/share/doc/papers/px/fig1.3.n b/share/doc/papers/px/fig1.3.n
new file mode 100644
index 0000000..934296f
--- /dev/null
+++ b/share/doc/papers/px/fig1.3.n
@@ -0,0 +1,60 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)fig1.3.n 5.2 (Berkeley) 4/17/91
+.\"
+.TS
+center, allbox;
+lw(20).
+T{
+.nf
+.ce 1000
+Space for
+value returned
+from f
+.ce 0
+.fi
+T}
+T{
+.ce
+Value of a
+T}
+T{
+.sp
+.ce
+Block Mark
+.sp
+T}
+.TE
+.sp .1i
+.ce
+Figure 1.3 \- Stack structure on function call `f(a)'
+.sp .1i
diff --git a/share/doc/papers/px/fig2.3.raw b/share/doc/papers/px/fig2.3.raw
new file mode 100644
index 0000000..07feddf
--- /dev/null
+++ b/share/doc/papers/px/fig2.3.raw
@@ -0,0 +1,103 @@
+HALT 2.2 Produce control flow backtrace
+BEG s,W,w," 2.2,1.8 Write second part of block mark, enter block
+END 2.2,1.8 End block execution
+CALL l,A 2.2,1.8 Procedure or function call
+NODUMP s,W,w," 2.2 \s-2BEG\s0 main program, suppress dump
+PUSH s 2.2,1.9 Clear space (for function result)
+POP s 2.2,1.9 Pop (arguments) off stack
+LINO s 2.2 Set line number, count statements
+TRA a 2.2 Short control transfer (local branching)
+TRA4 A 2.2 Long control transfer
+GOTO l,A 2.2,1.8 Non-local goto statement
+IF a 2.3 Conditional transfer
+REL* r 2.3 Relational test yielding Boolean result
+AND 2.4 Boolean and
+OR 2.4 Boolean or
+NOT 2.4 Boolean not
+LRV* l,A 2.5 Right value (load) operators
+RV* l,a 2.5 Right value (load) operators
+CON* v 2.5 Load constant operators
+AS* 2.5 Assignment operators
+OFF s 2.5 Offset address, typically used for field reference
+INX* s,w,w 2.6 Subscripting (indexing) operator
+NIL 2.6 Assert non-nil pointer
+LLV l,W 2.6 Address of operator
+LV l,w 2.6 Address of operator
+IND* 2.6 Indirection operators
+ADD* 2.7 Addition
+SUB* 2.7 Subtraction
+MUL* 2.7 Multiplication
+SQR* 2.7 Squaring
+DIV* 2.7 Fixed division
+MOD* 2.7 Modulus
+ABS* 2.7 Absolute value
+NEG* 2.7 Negation
+DVD* 2.7 Floating division
+RANG* v 2.8 Subrange checking
+CASEOP* 2.9 Case statements
+FOR* a 2.12 For statements
+PXPBUF w 2.10 Initialize \fIpxp\fP count buffer
+TRACNT w,A 2.10 Count a procedure entry
+COUNT w 2.10 Count a statement count point
+CTTOT s,w,w 2.11 Construct set
+CARD s 2.11 Cardinality of set
+STOI 2.12 Convert short to long integer
+STOD 2.12 Convert short integer to real
+ITOD 2.12 Convert integer to real
+ITOS 2.12 Convert integer to short integer
+GET 3.7 Get next record from a file
+PUT 3.8 Output a record to a file
+MESSAGE 3.6 Write to terminal
+FNIL 3.7 Check file initialized, not eof, synced
+FLUSH 3.11 Flush a file
+BUFF 3.11 Specify buffering for file "output"
+EOF 3.10 Returns \fItrue\fR if end of file
+EOLN 3.10 Returns \fItrue\fR if end of line on input text file
+RESET 3.11 Open file for input
+REWRITE 3.11 Open file for output
+REMOVE 3.11 Remove a file
+UNIT* 3.10 Set active file
+READ* 3.7 Read a record from a file
+WRITEC 3.8 Character unformatted write
+WRITEF l 3.8 General formatted write
+WRITES l 3.8 String unformatted write
+WRITLN 3.8 Output a newline to a text file
+PAGE 3.8 Output a formfeed to a text file
+MIN s 3.8 Minimum of top of stack and \fIs\fR
+MAX s,w 3.8 Maximum of top of stack and \fIw\fR
+NAM A 3.8 Convert enumerated type value to print format
+FILE 3.9 Push descriptor for active file
+DEFNAME 3.11 Attach file name for \fBprogram\fR statement files
+PACK s,w,w,w 2.15 Convert and copy from unpacked to packed
+UNPACK s,w,w,w 2.15 Convert and copy from packed to unpacked
+LLIMIT 2.14 Set linelimit for output text file
+ARGC 2.14 Returns number of arguments to current process
+ARGV 2.14 Copy specified process argument into char array
+CLCK 2.14 Returns user time of program
+SCLCK 2.14 Returns system time of program
+WCLCK 2.14 Returns current time stamp
+DATE 2.14 Copy date into char array
+TIME 2.14 Copy time into char array
+SEED 2.13 Set random seed, return old seed
+RANDOM 2.13 Returns random number
+DISPOSE 2.15 Dispose of a heap allocation
+NEW s 2.15 Allocate a record on heap, set pointer to it
+EXPO 2.13 Returns machine representation of real exponent
+ATAN 2.13 Returns arctangent of argument
+EXP 2.13 Returns exponential of argument
+LN 2.13 Returns natural log of argument
+COS 2.13 Returns cos of argument
+SIN 2.13 Returns sin of argument
+SQRT 2.13 Returns square root of argument
+CHR* 2.15 Returns integer to ascii mapping of argument
+ODD* 2.15 Returns \fItrue\fR if argument is odd, \fIfalse\fR if even
+PRED* 2.7 Returns predecessor of argument
+STLIM 2.14 Set program statement limit
+SUCC* 2.7 Returns successor of argument
+ROUND 2.13 Returns \s-2TRUNC\s0(argument + 0.5)
+TRUNC 2.13 Returns integer part of argument
+UNDEF 2.15 Returns \fIfalse\fR
+SDUP 2.2 Duplicate top stack word
+ASRT 2.12 Assert \fItrue\fR to continue
+IN s,w,w 2.11 Set membership
+INCT 2.11 Membership in a constructed set
diff --git a/share/doc/papers/px/fig2.4.n b/share/doc/papers/px/fig2.4.n
new file mode 100644
index 0000000..d752a0d
--- /dev/null
+++ b/share/doc/papers/px/fig2.4.n
@@ -0,0 +1,57 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)fig2.4.n 5.2 (Berkeley) 4/17/91
+.\"
+.KF
+.TS
+center, box;
+cw(15).
+\s-2CASEOP\s0
+_
+No. of cases
+_
+.sp
+Case
+transfer
+table
+.sp
+_
+.sp
+Array of case
+label values
+.sp
+.TE
+.sp
+.ce
+Figure 2.4 \- Case data structure
+.sp
+.KE
diff --git a/share/doc/papers/px/fig3.2.n b/share/doc/papers/px/fig3.2.n
new file mode 100644
index 0000000..d8905a9
--- /dev/null
+++ b/share/doc/papers/px/fig3.2.n
@@ -0,0 +1,56 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)fig3.2.n 5.2 (Berkeley) 4/17/91
+.\"
+.FK
+.TS
+center, box;
+cw(15).
+No. of cases
+_
+.sp
+offsets
+of element
+names
+.sp
+_
+.sp
+Array of
+null terminated
+element names
+.sp
+.TE
+.sp
+.ce
+Figure 3.2 \- Enumerated type conversion structure
+.sp
+.KE
diff --git a/share/doc/papers/px/fig3.3.n b/share/doc/papers/px/fig3.3.n
new file mode 100644
index 0000000..bf42dab
--- /dev/null
+++ b/share/doc/papers/px/fig3.3.n
@@ -0,0 +1,57 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)fig3.3.n 5.2 (Berkeley) 4/17/91
+.\"
+.KF
+.TS
+center;
+l l
+l | cw(15) |.
+ _
+\fIbool\fP: 2
+ _
+ 6
+ _
+ 12
+ _
+ 17
+ _
+ "false"
+ _
+ "true"
+ _
+.TE
+.sp
+.ce
+Figure 3.3 \- Boolean type conversion structure
+.sp
+.KE
diff --git a/share/doc/papers/px/pxin0.n b/share/doc/papers/px/pxin0.n
new file mode 100644
index 0000000..18edfc6
--- /dev/null
+++ b/share/doc/papers/px/pxin0.n
@@ -0,0 +1,140 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)pxin0.n 5.2 (Berkeley) 4/17/91
+.\"
+.if !\n(xx .so tmac.p
+.RP
+.TL
+Berkeley Pascal
+PX Implementation Notes
+.br
+Version 2.0 \- January, 1979
+.AU
+William N. Joy\*(Dg
+.AU
+M. Kirk McKusick\*(Dd
+.AI
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.AB
+.PP
+Berkeley Pascal
+is designed for interactive instructional use and runs on the
+.SM "VAX 11/780" .
+The interpreter
+.I px
+executes the Pascal binaries generated by the Pascal translator
+.I pi .
+.PP
+The
+.I
+PX Implementation Notes
+.R
+describe the general organization of
+.I px ,
+detail the various operations of the interpreter,
+and describe the file input/output structure.
+Conclusions are given on the viability of an interpreter
+based approach to language implementation for an instructional environment.
+.AE
+.if n 'ND
+.SH
+Introduction
+.PP
+These
+.I
+PX Implementation Notes
+.R
+have been updated from the original
+.SM "PDP 11/70"
+implementation notes to reflect the interpreter that runs on the
+.SM "VAX 11/780" .
+These notes consist of four major parts.
+The first part outlines the general organization of
+.I px .
+Section 2 describes the operations (instructions) of the interpreter
+while section 3 focuses on input/output related activity.
+A final section gives conclusions about the viability of an interpreter
+based approach to language implementation for instruction.
+.SH
+Related Berkeley Pascal documents
+.PP
+The
+.I "PXP Implementation Notes"
+give details of the internals of the execution profiler
+.I pxp;
+parts of the interpreter related to
+.I pxp
+are discussed in section 2.10.
+A paper describing the syntactic error recovery mechanism used in
+.I pi
+was presented at the ACM Conference on Compiler Construction
+in Boulder Colorado in August, 1979.
+.SH
+Acknowledgements
+.PP
+This version of
+.I px
+is a
+.SM "PDP 11/70"
+to
+.SM "VAX 11/780"
+opcode mapping of the original
+.I px
+that was designed and implemented by Ken Thompson,
+with extensive modifications and additions
+by William Joy
+and Charles Haley.
+Without their work, this
+.UP
+system would never have existed.
+These notes were first written by William Joy for the
+.SM "PDP 11/70"
+implementation.
+We would also like to thank our faculty advisor Susan L. Graham
+for her encouragement,
+her helpful comments and suggestions
+relating to
+.UP
+and her excellent editorial assistance.
+.FS
+\*(dg\ The financial support of the National Science Foundation under grants
+MCS74-07644-A03 and MCS78-07291
+and of an \s-2IBM\s0 Graduate Fellowship are gratefully acknowledged.
+.FE
+.FS
+\*(dd\ The financial support of a Howard Hughes Graduate
+Fellowship is gratefully acknowledged.
+.FE
+.bp
diff --git a/share/doc/papers/px/pxin1.n b/share/doc/papers/px/pxin1.n
new file mode 100644
index 0000000..9a2c256
--- /dev/null
+++ b/share/doc/papers/px/pxin1.n
@@ -0,0 +1,538 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)pxin1.n 5.2 (Berkeley) 4/17/91
+.\"
+.if !\n(xx .so tmac.p
+.tr _\(ru
+.nr H1 0
+.NH
+Organization
+.PP
+Most of
+.I px
+is written in the
+.SM "VAX 11/780"
+assembly language, using the
+.UX
+assembler
+.I as.
+Portions of
+.I px
+are also written in the
+.UX
+systems programming language C.
+.I Px
+consists of a main procedure that reads in the interpreter code,
+a main interpreter loop that transfers successively to various
+code segments implementing the abstract machine operations,
+built-in procedures and functions,
+and several routines that support the implementation of the
+Pascal input-output environment.
+.PP
+The interpreter runs at a fraction of the speed of equivalent
+compiled C code, with this fraction varying from 1/5 to 1/15.
+The interpreter occupies 18.5K bytes of instruction space, shared among
+all processes executing Pascal, and has 4.6K bytes of data space (constants,
+error messages, etc.) a copy of which is allocated to each executing process.
+.NH 2
+Format of the object file
+.PP
+.I Px
+normally interprets the code left in an object file by a run of the
+Pascal translator
+.I pi.
+The file where the translator puts the object originally, and the most
+commonly interpreted file, is called
+.I obj.
+In order that all persons using
+.I px
+share a common text image, this executable file is
+a small process that coordinates with the interpreter to start
+execution.
+The interpreter code is placed
+at the end of a special ``header'' file and the size of the initialized
+data area of this header file is expanded to include this code,
+so that during execution it is located at an
+easily determined address in its data space.
+When executed, the object process creates a
+.I pipe ,
+creates another process by doing a
+.I fork ,
+and arranges that the resulting parent process becomes an instance of
+.I px .
+The child process then writes the interpreter code through
+the pipe that it has to the
+interpreter process parent.
+When this process is complete, the child exits.
+.PP
+The real advantage of this approach is that it does not require modifications
+to the shell, and that the resultant objects are ``true objects'' not
+requiring special treatment.
+A simpler mechanism would be to determine the name of the file that was
+executed and pass this to the interpreter.
+However it is not possible to determine this name
+in all cases.\*(Dd
+.FS
+\*(dd\ For instance, if the
+.I pxref
+program is placed in the directory
+`/usr/bin'
+then when the user types
+``pxref program.p''
+the first argument to the program, nominally the programs name, is
+``pxref.''
+While it would be possible to search in the standard place,
+i.e. the current directory, and the system directories
+`/bin'
+and
+`/usr/bin'
+for a corresponding object file,
+this would be expensive and not guaranteed to succeed.
+Several shells exist that allow other directories to be searched
+for commands, and there is,
+in general,
+no way to determine what these directories are.
+.FE
+.NH 2
+General features of object code
+.PP
+Pascal object code is relocatable as all addressing references for
+control transfers within the code are relative.
+The code consists of instructions interspersed with inline data.
+All instructions have a length that is an even number of bytes.
+No variables are kept in the object code area.
+.PP
+The first byte of a Pascal interpreter instruction contains an operation
+code.
+This allows a total of 256 major operation codes, and 232 of these are
+in use in the current
+.I px.
+The second byte of each interpreter instruction is called the
+``sub-operation code'',
+or more commonly the
+.I sub-opcode.
+It contains a small integer that may, for example, be used as a
+block-structure level for the associated operation.
+If the instruction can take a longword constant,
+this constant is often packed into the sub-opcode
+if it fits into 8 bits and is not zero.
+A sub-opcode value of zero specifies that the constant would not
+fit and therefore follows in the next word.
+This is a space optimization, the value of zero for flagging
+the longer case being convenient because it is easy to test.
+.PP
+Other instruction formats are used.
+The branching
+instructions take an offset in the following word,
+operators that load constants onto the stack
+take arbitrarily long inline constant values,
+and many operations deal exclusively with data on the
+interpreter stack, requiring no inline data.
+.NH 2
+Stack structure of the interpreter
+.PP
+The interpreter emulates a stack-structured Pascal machine.
+The ``load'' instructions put values onto the stack, where all
+arithmetic operations take place.
+The ``store'' instructions take values off the stack
+and place them in an address that is also contained on the stack.
+The only way to move data or to compute in the machine is with the stack.
+.PP
+To make the interpreter operations more powerful
+and to thereby increase the interpreter speed,
+the arithmetic operations in the interpreter are ``typed''.
+That is, length conversion of arithmetic values occurs when they are
+used in an operation.
+This eliminates interpreter cycles for length conversion
+and the associated overhead.
+For example, when adding an integer that fits in one byte to one that
+requires four bytes to store, no ``conversion'' operators are required.
+The one byte integer is loaded onto the stack, followed by the four
+byte integer, and then an adding operator is used that has, implicit
+in its definition, the sizes of the arguments.
+.NH 2
+Data types in the interpreter
+.PP
+The interpreter deals with several different fundamental data types.
+In the memory of the machine, 1, 2, and 4 byte integers are supported,
+with only 2 and 4 byte integers being present on the stack.
+The interpreter always converts to 4 byte integers when there is a possibility
+of overflowing the shorter formats.
+This corresponds to the Pascal language definition of overflow in
+arithmetic operations that requires that the result be correct
+if all partial values lie within the bounds of the base integer type:
+4 byte integer values.
+.PP
+Character constants are treated similarly to 1 byte integers for
+most purposes, as are Boolean values.
+All enumerated types are treated as integer values of
+an appropriate length, usually 1 byte.
+The interpreter also has real numbers, occupying 8 bytes of storage,
+and sets and strings of varying length.
+The appropriate operations are included for each data type, such as
+set union and intersection and an operation to write a string.
+.PP
+No special
+.B packed
+data formats are supported by the interpreter.
+The smallest unit of storage occupied by any variable is one byte.
+The built-ins
+.I pack
+and
+.I unpack
+thus degenerate to simple memory to memory transfers with
+no special processing.
+.NH 2
+Runtime environment
+.PP
+The interpreter runtime environment uses a stack data area and a heap
+data area, that are kept at opposite ends of memory
+and grow towards each other.
+All global variables and variables local to procedures and functions
+are kept in the stack area.
+Dynamically allocated variables and buffers for input/output are
+allocated in the heap.
+.PP
+The addressing of block structured variables is done by using
+a fixed display
+that contains the address of its stack frame
+for each statically active block.\*(Dg
+.FS
+\*(dg\ Here ``block'' is being used to mean any
+.I procedure ,
+.I function
+or the main program.
+.FE
+This display is referenced by instructions that load and store
+variables and maintained by the operations for
+block entry and exit, and for non-local
+.B goto
+statements.
+.NH 2
+Dp, lc, loop
+.PP
+Three ``global'' variables in the interpreter, in addition to the
+``display'', are the
+.I dp,
+.I lc,
+and the
+.I loop.
+The
+.I dp
+is a pointer to the display entry for the current block;
+the
+.I lc
+is the abstract machine location counter;
+and the
+.I loop
+is a register that holds the address of the main interpreter
+loop so that returning to the loop to fetch the next instruction is
+a fast operation.
+.NH 2
+The stack frame structure
+.PP
+Each active block
+has a stack frame consisting of three parts:
+a block mark, local variables, and temporary storage for partially
+evaluated expressions.
+The stack in the interpreter grows from the high addresses in memory
+to the low addresses,
+so that those parts of the stack frame that are ``on the top''
+of the stack have the most negative offsets from the display
+entry for the block.
+The major parts of the stack frame are represented in Figure 1.1.
+.so fig1.1.n
+Note that the local variables of each block
+have negative offsets from the corresponding display entry,
+the ``first'' local variable having offset `\-2'.
+.NH 2
+The block mark
+.PP
+The block mark contains the saved information necessary
+to restore the environment when the current block exits.
+It consists of two parts.
+The first and top-most part is saved by the
+.SM CALL
+instruction in the interpreter.
+This information is not present for the main program
+as it is never ``called''.
+The second part of the block mark is created by the
+.SM BEG
+begin block operator that also allocates and clears the
+local variable storage.
+The format of these blocks is represented in Figure 1.2.
+.sp
+.so fig1.2.n
+.PP
+The data saved by the
+.SM CALL
+operator includes the line number
+.I lino
+of the point of call,
+that is printed if the program execution ends abnormally;
+the location counter
+.I lc
+giving the return address;
+and the current display entry address
+.I dp
+at the time of call.
+.PP
+The
+.SM BEG
+begin operator saves the previous display contents at the level
+of this block, so that the display can be restored on block exit.
+A pointer to the beginning line number and the
+name of this block is also saved.
+This information is stored in the interpreter object code in-line after the
+.SM BEG
+operator.
+It is used in printing a post-mortem backtrace.
+The saved file name and buffer reference are necessary because of
+the input/output structure
+(this is discussed in detail in
+sections 3.3 and 3.4).
+The top of stack reference gives the value the stack pointer should
+have when there are no expression temporaries on the stack.
+It is used for a consistency check in the
+.SM LINO
+line number operators in the interpreter, that occurs before
+each statement executed.
+This helps to catch bugs in the interpreter, that often manifest
+themselves by leaving the stack non-empty between statements.
+.PP
+Note that there is no explicit static link here.
+Thus to set up the display correctly after a non-local
+.B goto
+statement one must ``unwind''
+through all the block marks on the stack to rebuild the display.
+.NH 2
+Arguments and return values
+.PP
+A function returns its value into a space reserved by the calling
+block.
+Arguments to a
+.B function
+are placed on top of this return area.
+For both
+.B procedure
+and
+.B function
+calls, arguments are placed at the end of the expression evaluation area
+of the caller.
+When a
+.B function
+completes, expression evaluation can continue
+after popping the arguments to the
+.B function
+off the stack,
+exactly as if the function value had been ``loaded''.
+The arguments to a
+.B procedure
+are also popped off the stack by the caller
+after its execution ends.
+.KS
+.PP
+As a simple example consider the following stack structure
+for a call to a function
+.I f,
+of the form ``f(a)''.
+.so fig1.3.n
+.KE
+.PP
+If we suppose that
+.I f
+returns a
+.I real
+and that
+.I a
+is an integer,
+the calling sequence for this function would be:
+.DS
+.TS
+lp-2w(8) l.
+PUSH \-8
+RV4:\fIl a\fR
+CALL:\fIl f\fR
+POP 4
+.TE
+.DE
+.ZP
+Here we use the operator
+.SM PUSH
+to clear space for the return value,
+load
+.I a
+on the stack with a ``right value'' operator,
+call the function,
+pop off the argument
+.I a ,
+and can then complete evaluation of the containing expression.
+The operations used here will be explained in section 2.
+.PP
+If the function
+.I f
+were given by
+.LS
+ 10 \*bfunction\fR f(i: integer): real;
+ 11 \*bbegin\fR
+ 12 f := i
+ 13 \*bend\fR;
+.LE
+then
+.I f
+would have code sequence:
+.DS
+.TS
+lp-2w(8) l.
+BEG:2 0
+ 11
+ "f"
+LV:\fIl\fR 40
+RV4:\fIl\fR 32
+AS48
+END
+.TE
+.DE
+.ZP
+Here the
+.SM BEG
+operator takes 9 bytes of inline data.
+The first byte specifies the
+length of the function name.
+The second longword specifies the
+amount of local variable storage, here none.
+The succeeding two lines give the line number of the
+.B begin
+and the name of the block
+for error traceback.
+The
+.SM BEG
+operator places a name pointer in the block mark.
+The body of the
+.B function
+first takes an address of the
+.B function
+result variable
+.I f
+using the address of operator
+.SM LV
+.I a .
+The next operation in the interpretation of this function is the loading
+of the value of
+.I i .
+.I I
+is at the level of the
+.B function
+.I f ,
+here symbolically
+.I l,
+and the first variable in the local variable area.
+The
+.B function
+completes by assigning the 4 byte integer on the stack to the 8 byte
+return location, hence the
+.SM AS48
+assignment operator, and then uses the
+.SM END
+operator to exit the current block.
+.NH 2
+The main interpreter loop
+.PP
+The main interpreter loop is simply:
+.DS
+.mD
+iloop:
+ \fBcaseb\fR (lc)+,$0,$255
+ <table of opcode interpreter addresses>
+.DE
+.ZP
+The main opcode is extracted from the first byte of the instruction
+and used to index into the table of opcode interpreter addresses.
+Control is then transferred to the specified location.
+The sub-opcode may be used to index the display,
+as a small constant,
+or to specify one of several relational operators.
+In the cases where a constant is needed, but it
+is not small enough to fit in the byte sub-operator,
+a zero is placed there and the constant follows in the next word.
+Zero is easily tested for,
+as the instruction that fetches the
+sub-opcode sets the condition code flags.
+A construction like:
+.DS
+.mD
+_OPER:
+ \fBcvtbl\fR (lc)+,r0
+ \fBbneq\fR L1
+ \fBcvtwl\fR (lc)+,r0
+L1: ...
+.DE
+is all that is needed to effect this packing of data.
+This technique saves space in the Pascal
+.I obj
+object code.
+.PP
+The address of the instruction at
+.I iloop
+is always contained in the register variable
+.I loop .
+Thus a return to the main interpreter is simply:
+.DS
+ \fBjmp\fR (loop)
+.DE
+that is both quick and occupies little space.
+.NH 2
+Errors
+.PP
+Errors during interpretation fall into three classes:
+.DS
+1) Interpreter detected errors.
+2) Hardware detected errors.
+3) External events.
+.DE
+.PP
+Interpreter detected errors include I/O errors and
+built-in function errors.
+These errors cause a subroutine call to an error routine
+with a single parameter indicating the cause of the error.
+Hardware errors such as range errors and overflows are
+fielded by a special routine that determines the opcode
+that caused the error.
+It then calls the error routine with an appropriate error
+parameter.
+External events include interrupts and system limits such
+as available memory.
+They generate a call to the error routine with an
+appropriate error code.
+The error routine processes the error condition,
+printing an appropriate error message and usually
+a backtrace from the point of the error.
diff --git a/share/doc/papers/px/pxin2.n b/share/doc/papers/px/pxin2.n
new file mode 100644
index 0000000..0a12b90
--- /dev/null
+++ b/share/doc/papers/px/pxin2.n
@@ -0,0 +1,923 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)pxin2.n 5.2 (Berkeley) 4/17/91
+.\"
+.if !\n(xx .so tmac.p
+.nr H1 1
+.if n .ND
+.NH
+Operations
+.NH 2
+Naming conventions and operation summary
+.PP
+Table 2.1 outlines the opcode typing convention.
+The expression ``a above b'' means that `a' is on top
+of the stack with `b' below it.
+Table 2.3 describes each of the opcodes.
+The character `*' at the end of a name specifies that
+all operations with the root prefix
+before the `*'
+are summarized by one entry.
+Table 2.2 gives the codes used
+to describe the type inline data expected by each instruction.
+.sp 2
+.so table2.1.n
+.sp 2
+.so table2.2.n
+.bp
+.so table2.3.n
+.bp
+.NH 2
+Basic control operations
+.LP
+.SH
+HALT
+.IP
+Corresponds to the Pascal procedure
+.I halt ;
+causes execution to end with a post-mortem backtrace as if a run-time
+error had occurred.
+.SH
+BEG s,W,w,"
+.IP
+Causes the second part of the block mark to be created, and
+.I W
+bytes of local variable space to be allocated and cleared to zero.
+Stack overflow is detected here.
+.I w
+is the first line of the body of this section for error traceback,
+and the inline string (length s) the character representation of its name.
+.SH
+NODUMP s,W,w,"
+.IP
+Equivalent to
+.SM BEG ,
+and used to begin the main program when the ``p''
+option is disabled so that the post-mortem backtrace will be inhibited.
+.SH
+END
+.IP
+Complementary to the operators
+.SM CALL
+and
+.SM BEG ,
+exits the current block, calling the procedure
+.I pclose
+to flush buffers for and release any local files.
+Restores the environment of the caller from the block mark.
+If this is the end for the main program, all files are
+.I flushed,
+and the interpreter is exited.
+.SH
+CALL l,A
+.IP
+Saves the current line number, return address, and active display entry pointer
+.I dp
+in the first part of the block mark, then transfers to the entry point
+given by the relative address
+.I A ,
+that is the beginning of a
+.B procedure
+or
+.B function
+at level
+.I l.
+.SH
+PUSH s
+.IP
+Clears
+.I s
+bytes on the stack.
+Used to make space for the return value of a
+.B function
+just before calling it.
+.SH
+POP s
+.IP
+Pop
+.I s
+bytes off the stack.
+Used after a
+.B function
+or
+.B procedure
+returns to remove the arguments from the stack.
+.SH
+TRA a
+.IP
+Transfer control to relative address
+.I a
+as a local
+.B goto
+or part of a structured statement.
+.SH
+TRA4 A
+.IP
+Transfer control to an absolute address as part of a non-local
+.B goto
+or to branch over procedure bodies.
+.SH
+LINO s
+.IP
+Set current line number to
+.I s.
+For consistency, check that the expression stack is empty
+as it should be (as this is the start of a statement.)
+This consistency check will fail only if there is a bug in the
+interpreter or the interpreter code has somehow been damaged.
+Increment the statement count and if it exceeds the statement limit,
+generate a fault.
+.SH
+GOTO l,A
+.IP
+Transfer control to address
+.I A
+that is in the block at level
+.I l
+of the display.
+This is a non-local
+.B goto.
+Causes each block to be exited as if with
+.SM END ,
+flushing and freeing files with
+.I pclose,
+until the current display entry is at level
+.I l.
+.SH
+SDUP*
+.IP
+Duplicate the word or long on the top of
+the stack.
+This is used mostly for constructing sets.
+See section 2.11.
+.NH 2
+If and relational operators
+.SH
+IF a
+.IP
+The interpreter conditional transfers all take place using this operator
+that examines the Boolean value on the top of the stack.
+If the value is
+.I true ,
+the next code is executed,
+otherwise control transfers to the specified address.
+.SH
+REL* r
+.IP
+These take two arguments on the stack,
+and the sub-operation code specifies the relational operation to
+be done, coded as follows with `a' above `b' on the stack:
+.DS
+.mD
+.TS
+lb lb
+c a.
+Code Operation
+_
+0 a = b
+2 a <> b
+4 a < b
+6 a > b
+8 a <= b
+10 a >= b
+.TE
+.DE
+.IP
+Each operation does a test to set the condition code
+appropriately and then does an indexed branch based on the
+sub-operation code to a test of the condition here specified,
+pushing a Boolean value on the stack.
+.IP
+Consider the statement fragment:
+.DS
+.mD
+\*bif\fR a = b \*bthen\fR
+.DE
+.IP
+If
+.I a
+and
+.I b
+are integers this generates the following code:
+.DS
+.TS
+lp-2w(8) l.
+RV4:\fIl a\fR
+RV4:\fIl b\fR
+REL4 \&=
+IF \fIElse part offset\fR
+.sp
+.T&
+c s.
+\fI\&... Then part code ...\fR
+.TE
+.DE
+.NH 2
+Boolean operators
+.PP
+The Boolean operators
+.SM AND ,
+.SM OR ,
+and
+.SM NOT
+manipulate values on the top of the stack.
+All Boolean values are kept in single bytes in memory,
+or in single words on the stack.
+Zero represents a Boolean \fIfalse\fP, and one a Boolean \fItrue\fP.
+.NH 2
+Right value, constant, and assignment operators
+.SH
+LRV* l,A
+.br
+RV* l,a
+.IP
+The right value operators load values on the stack.
+They take a block number as a sub-opcode and load the appropriate
+number of bytes from that block at the offset specified
+in the following word onto the stack. As an example, consider
+.SM LRV4 :
+.DS
+.mD
+_LRV4:
+ \fBcvtbl\fR (lc)+,r0 #r0 has display index
+ \fBaddl3\fR _display(r0),(lc)+,r1 #r1 has variable address
+ \fBpushl\fR (r1) #put value on the stack
+ \fBjmp\fR (loop)
+.DE
+.IP
+Here the interpreter places the display level in r0.
+It then adds the appropriate display value to the inline offset and
+pushes the value at this location onto the stack.
+Control then returns to the main
+interpreter loop.
+The
+.SM RV*
+operators have short inline data that
+reduces the space required to address the first 32K of
+stack space in each stack frame.
+The operators
+.SM RV14
+and
+.SM RV24
+provide explicit conversion to long as the data
+is pushed.
+This saves the generation of
+.SM STOI
+to align arguments to
+.SM C
+subroutines.
+.SH
+CON* r
+.IP
+The constant operators load a value onto the stack from inline code.
+Small integer values are condensed and loaded by the
+.SM CON1
+operator, that is given by
+.DS
+.mD
+_CON1:
+ \fBcvtbw\fR (lc)+,\-(sp)
+ \fBjmp\fR (loop)
+.DE
+.IP
+Here note that little work was required as the required constant
+was available at (lc)+.
+For longer constants,
+.I lc
+must be incremented before moving the constant.
+The operator
+.SM CON
+takes a length specification in the sub-opcode and can be used to load
+strings and other variable length data onto the stack.
+The operators
+.SM CON14
+and
+.SM CON24
+provide explicit conversion to long as the constant is pushed.
+.SH
+AS*
+.IP
+The assignment operators are similar to arithmetic and relational operators
+in that they take two operands, both in the stack,
+but the lengths given for them specify
+first the length of the value on the stack and then the length
+of the target in memory.
+The target address in memory is under the value to be stored.
+Thus the statement
+.DS
+i := 1
+.DE
+.IP
+where
+.I i
+is a full-length, 4 byte, integer,
+will generate the code sequence
+.DS
+.TS
+lp-2w(8) l.
+LV:\fIl i\fP
+CON1:1
+AS24
+.TE
+.DE
+.IP
+Here
+.SM LV
+will load the address of
+.I i,
+that is really given as a block number in the sub-opcode and an
+offset in the following word,
+onto the stack, occupying a single word.
+.SM CON1 ,
+that is a single word instruction,
+then loads the constant 1,
+that is in its sub-opcode,
+onto the stack.
+Since there are not one byte constants on the stack,
+this becomes a 2 byte, single word integer.
+The interpreter then assigns a length 2 integer to a length 4 integer using
+.SM AS24 \&.
+The code sequence for
+.SM AS24
+is given by:
+.DS
+.mD
+_AS24:
+ \fBincl\fR lc
+ \fBcvtwl\fR (sp)+,*(sp)+
+ \fBjmp\fR (loop)
+.DE
+.IP
+Thus the interpreter gets the single word off the stack,
+extends it to be a 4 byte integer
+gets the target address off the stack,
+and finally stores the value in the target.
+This is a typical use of the constant and assignment operators.
+.NH 2
+Addressing operations
+.SH
+LLV l,W
+.br
+LV l,w
+.IP
+The most common operation done by the interpreter
+is the ``left value'' or ``address of'' operation.
+It is given by:
+.DS
+.mD
+_LLV:
+ \fBcvtbl\fR (lc)+,r0 #r0 has display index
+ \fBaddl3\fR _display(r0),(lc)+,\-(sp) #push address onto the stack
+ \fBjmp\fR (loop)
+.DE
+.IP
+It calculates an address in the block specified in the sub-opcode
+by adding the associated display entry to the
+offset that appears in the following word.
+The
+.SM LV
+operator has a short inline data that reduces the space
+required to address the first 32K of stack space in each call frame.
+.SH
+OFF s
+.IP
+The offset operator is used in field names.
+Thus to get the address of
+.LS
+p^.f1
+.LE
+.IP
+.I pi
+would generate the sequence
+.DS
+.mD
+.TS
+lp-2w(8) l.
+RV:\fIl p\fP
+OFF \fIf1\fP
+.TE
+.DE
+.IP
+where the
+.SM RV
+loads the value of
+.I p,
+given its block in the sub-opcode and offset in the following word,
+and the interpreter then adds the offset of the field
+.I f1
+in its record to get the correct address.
+.SM OFF
+takes its argument in the sub-opcode if it is small enough.
+.SH
+NIL
+.IP
+The example above is incomplete, lacking a check for a
+.B nil
+pointer.
+The code generated would be
+.DS
+.TS
+lp-2w(8) l.
+RV:\fIl p\fP
+NIL
+OFF \fIf1\fP
+.TE
+.DE
+.IP
+where the
+.SM NIL
+operation checks for a
+.I nil
+pointer and generates the appropriate runtime error if it is.
+.SH
+LVCON s,"
+.IP
+A pointer to the specified length inline data is pushed
+onto the stack.
+This is primarily used for
+.I printf
+type strings used by
+.SM WRITEF .
+(see sections 3.6 and 3.8)
+.SH
+INX* s,w,w
+.IP
+The operators
+.SM INX2
+and
+.SM INX4
+are used for subscripting.
+For example, the statement
+.DS
+a[i] := 2.0
+.DE
+.IP
+with
+.I i
+an integer and
+.I a
+an
+``array [1..1000] of real''
+would generate
+.DS
+.TS
+lp-2w(8) l.
+LV:\fIl a\fP
+RV4:\fIl i\fP
+INX4:8 1,999
+CON8 2.0
+AS8
+.TE
+.DE
+.IP
+Here the
+.SM LV
+operation takes the address of
+.I a
+and places it on the stack.
+The value of
+.I i
+is then placed on top of this on the stack.
+The array address is indexed by the
+length 4 index (a length 2 index would use
+.SM INX2 )
+where the individual elements have a size of 8 bytes.
+The code for
+.SM INX4
+is:
+.DS
+.mD
+_INX4:
+ \fBcvtbl\fR (lc)+,r0
+ \fBbneq\fR L1
+ \fBcvtwl\fR (lc)+,r0 #r0 has size of records
+L1:
+ \fBcvtwl\fR (lc)+,r1 #r1 has lower bound
+ \fBmovzwl\fR (lc)+,r2 #r2 has upper-lower bound
+ \fBsubl3\fR r1,(sp)+,r3 #r3 has base subscript
+ \fBcmpl\fR r3,r2 #check for out of bounds
+ \fBbgtru\fR esubscr
+ \fBmull2\fR r0,r3 #calculate byte offset
+ \fBaddl2\fR r3,(sp) #calculate actual address
+ \fBjmp\fR (loop)
+esubscr:
+ \fBmovw\fR $ESUBSCR,_perrno
+ \fBjbr\fR error
+.DE
+.IP
+Here the lower bound is subtracted, and range checked against the
+upper minus lower bound.
+The offset is then scaled to a byte offset into the array
+and added to the base address on the stack.
+Multi-dimension subscripts are translated as a sequence of single subscriptings.
+.SH
+IND*
+.IP
+For indirect references through
+.B var
+parameters and pointers,
+the interpreter has a set of indirection operators that convert a pointer
+on the stack into a value on the stack from that address.
+different
+.SM IND
+operators are necessary because of the possibility of different
+length operands.
+The
+.SM IND14
+and
+.SM IND24
+operators do conversions to long
+as they push their data.
+.NH 2
+Arithmetic operators
+.PP
+The interpreter has many arithmetic operators.
+All operators produce results long enough to prevent overflow
+unless the bounds of the base type are exceeded.
+The basic operators available are
+.DS
+Addition: ADD*, SUCC*
+Subtraction: SUB*, PRED*
+Multiplication: MUL*, SQR*
+Division: DIV*, DVD*, MOD*
+Unary: NEG*, ABS*
+.DE
+.NH 2
+Range checking
+.PP
+The interpreter has several range checking operators.
+The important distinction among these operators is between values whose
+legal range begins at zero and those that do not begin at zero,
+for example
+a subrange variable whose values range from 45 to 70.
+For those that begin at zero, a simpler ``logical'' comparison against
+the upper bound suffices.
+For others, both the low and upper bounds must be checked independently,
+requiring two comparisons.
+On the
+.SM "VAX 11/780"
+both checks are done using a single index instruction
+so the only gain is in reducing the inline data.
+.NH 2
+Case operators
+.PP
+The interpreter includes three operators for
+.B case
+statements that are used depending on the width of the
+.B case
+label type.
+For each width, the structure of the case data is the same, and
+is represented in figure 2.4.
+.sp 1
+.so fig2.4.n
+.PP
+The
+.SM CASEOP
+case statement operators do a sequential search through the
+case label values.
+If they find the label value, they take the corresponding entry
+from the transfer table and cause the interpreter to branch to the
+specified statement.
+If the specified label is not found, an error results.
+.PP
+The
+.SM CASE
+operators take the number of cases as a sub-opcode
+if possible.
+Three different operators are needed to handle single byte,
+word, and long case transfer table values.
+For example, the
+.SM CASEOP1
+operator has the following code sequence:
+.DS
+.mD
+_CASEOP1:
+ \fBcvtbl\fR (lc)+,r0
+ \fBbneq\fR L1
+ \fBcvtwl\fR (lc)+,r0 #r0 has length of case table
+L1:
+ \fBmovaw\fR (lc)[r0],r2 #r2 has pointer to case labels
+ \fBmovzwl\fR (sp)+,r3 #r3 has the element to find
+ \fBlocc\fR r3,r0,(r2) #r0 has index of located element
+ \fBbeql\fR caserr #element not found
+ \fBmnegl\fR r0,r0 #calculate new lc
+ \fBcvtwl\fR (r2)[r0],r1 #r1 has lc offset
+ \fBaddl2\fR r1,lc
+ \fBjmp\fR (loop)
+caserr:
+ \fBmovw\fR $ECASE,_perrno
+ \fBjbr\fR error
+.DE
+.PP
+Here the interpreter first computes the address of the beginning
+of the case label value area by adding twice the number of case label
+values to the address of the transfer table, since the transfer
+table entries are 2 byte address offsets.
+It then searches through the label values, and generates an ECASE
+error if the label is not found.
+If the label is found, the index of the corresponding entry
+in the transfer table is extracted and that offset is added
+to the interpreter location counter.
+.NH 2
+Operations supporting pxp
+.PP
+The following operations are defined to do execution profiling.
+.SH
+PXPBUF w
+.IP
+Causes the interpreter to allocate a count buffer
+with
+.I w
+four byte counters
+and to clear them to zero.
+The count buffer is placed within an image of the
+.I pmon.out
+file as described in the
+.I "PXP Implementation Notes."
+The contents of this buffer are written to the file
+.I pmon.out
+when the program ends.
+.SH
+COUNT w
+.IP
+Increments the counter specified by
+.I w .
+.SH
+TRACNT w,A
+.IP
+Used at the entry point to procedures and functions,
+combining a transfer to the entry point of the block with
+an incrementing of its entry count.
+.NH 2
+Set operations
+.PP
+The set operations:
+union
+.SM ADDT,
+intersection
+.SM MULT,
+element removal
+.SM SUBT,
+and the set relationals
+.SM RELT
+are straightforward.
+The following operations are more interesting.
+.SH
+CARD s
+.IP
+Takes the cardinality of a set of size
+.I s
+bytes on top of the stack, leaving a 2 byte integer count.
+.SM CARD
+uses the
+.B ffs
+opcode to successively count the number of set bits in the set.
+.SH
+CTTOT s,w,w
+.IP
+Constructs a set.
+This operation requires a non-trivial amount of work,
+checking bounds and setting individual bits or ranges of bits.
+This operation sequence is slow,
+and motivates the presence of the operator
+.SM INCT
+below.
+The arguments to
+.SM CTTOT
+include the number of elements
+.I s
+in the constructed set,
+the lower and upper bounds of the set,
+the two
+.I w
+values,
+and a pair of values on the stack for each range in the set, single
+elements in constructed sets being duplicated with
+.SM SDUP
+to form degenerate ranges.
+.SH
+IN s,w,w
+.IP
+The operator
+.B in
+for sets.
+The value
+.I s
+specifies the size of the set,
+the two
+.I w
+values the lower and upper bounds of the set.
+The value on the stack is checked to be in the set on the stack,
+and a Boolean value of
+.I true
+or
+.I false
+replaces the operands.
+.SH
+INCT
+.IP
+The operator
+.B in
+on a constructed set without constructing it.
+The left operand of
+.B in
+is on top of the stack followed by the number of pairs in the
+constructed set,
+and then the pairs themselves, all as single word integers.
+Pairs designate runs of values and single values are represented by
+a degenerate pair with both value equal.
+This operator is generated in grammatical constructs such as
+.LS
+\fBif\fR character \fBin\fR [`+', '\-', `*', `/']
+.LE
+.IP
+or
+.LS
+\fBif\fR character \fBin\fR [`a'..`z', `$', `_']
+.LE
+.IP
+These constructs are common in Pascal, and
+.SM INCT
+makes them run much faster in the interpreter,
+as if they were written as an efficient series of
+.B if
+statements.
+.NH 2
+Miscellaneous
+.PP
+Other miscellaneous operators that are present in the interpreter
+are
+.SM ASRT
+that causes the program to end if the Boolean value on the stack is not
+.I true,
+and
+.SM STOI ,
+.SM STOD ,
+.SM ITOD ,
+and
+.SM ITOS
+that convert between different length arithmetic operands for
+use in aligning the arguments in
+.B procedure
+and
+.B function
+calls, and with some untyped built-ins, such as
+.SM SIN
+and
+.SM COS \&.
+.PP
+Finally, if the program is run with the run-time testing disabled, there
+are special operators for
+.B for
+statements
+and special indexing operators for arrays
+that have individual element size that is a power of 2.
+The code can run significantly faster using these operators.
+.NH 2
+Mathematical Functions
+.PP
+The transcendental functions
+.SM SIN ,
+.SM COS ,
+.SM ATAN ,
+.SM EXP ,
+.SM LN ,
+.SM SQRT ,
+.SM SEED ,
+and
+.SM RANDOM
+are taken from the standard UNIX
+mathematical package.
+These functions take double precision floating point
+values and return the same.
+.PP
+The functions
+.SM EXPO ,
+.SM TRUNC ,
+and
+.SM ROUND
+take a double precision floating point number.
+.SM EXPO
+returns an integer representing the machine
+representation of its argument's exponent,
+.SM TRUNC
+returns the integer part of its argument, and
+.SM ROUND
+returns the rounded integer part of its argument.
+.NH 2
+System functions and procedures
+.SH
+LLIMIT
+.IP
+A line limit and a file pointer are passed on the stack.
+If the limit is non-negative the line limit is set to the
+specified value, otherwise it is set to unlimited.
+The default is unlimited.
+.SH
+STLIM
+.IP
+A statement limit is passed on the stack. The statement limit
+is set as specified.
+The default is 500,000.
+No limit is enforced when the ``p'' option is disabled.
+.SH
+CLCK
+.br
+SCLCK
+.IP
+.SM CLCK
+returns the number of milliseconds of user time used by the program;
+.SM SCLCK
+returns the number of milliseconds of system time used by the program.
+.SH
+WCLCK
+.IP
+The number of seconds since some predefined time is
+returned. Its primary usefulness is in determining
+elapsed time and in providing a unique time stamp.
+.sp
+.LP
+The other system time procedures are
+.SM DATE
+and
+.SM TIME
+that copy an appropriate text string into a pascal string array.
+The function
+.SM ARGC
+returns the number of command line arguments passed to the program.
+The procedure
+.SM ARGV
+takes an index on the stack and copies the specified
+command line argument into a pascal string array.
+.NH 2
+Pascal procedures and functions
+.SH
+PACK s,w,w,w
+.br
+UNPACK s,w,w,w
+.IP
+They function as a memory to memory move with several
+semantic checks.
+They do no ``unpacking'' or ``packing'' in the true sense as the
+interpreter supports no packed data types.
+.SH
+NEW s
+.br
+DISPOSE s
+.IP
+An
+.SM LV
+of a pointer is passed.
+.SM NEW
+allocates a record of a specified size and puts a pointer
+to it into the pointer variable.
+.SM DISPOSE
+deallocates the record pointed to by the pointer
+and sets the pointer to
+.SM NIL .
+.sp
+.LP
+The function
+.SM CHR*
+converts a suitably small integer into an ascii character.
+Its primary purpose is to do a range check.
+The function
+.SM ODD*
+returns
+.I true
+if its argument is odd and returns
+.I false
+if its argument is even.
+The function
+.SM UNDEF
+always returns the value
+.I false .
diff --git a/share/doc/papers/px/pxin3.n b/share/doc/papers/px/pxin3.n
new file mode 100644
index 0000000..9194460
--- /dev/null
+++ b/share/doc/papers/px/pxin3.n
@@ -0,0 +1,597 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)pxin3.n 5.2 (Berkeley) 4/17/91
+.\"
+.if !\n(xx .so tmac.p
+.ta 8n 16n 24n
+.nr H1 2
+.if n .ND
+.NH
+Input/output
+.NH 2
+The files structure
+.PP
+Each file in the Pascal environment is represented by a pointer
+to a
+.I files
+structure in the heap.
+At the location addressed by the pointer is the element
+in the file's window variable.
+Behind this window variable is information about the file,
+at the following offsets:
+.so table3.1.n
+.PP
+Here
+.SM FBUF
+is a pointer to the system FILE block for the file.
+The standard system I/O library is
+used that provides block buffered input/output,
+with 1024 characters normally transferred at each read or write.
+.PP
+The files in the
+Pascal environment,
+are all linked together on a single file chain through the
+.SM FCHAIN
+links.
+For each file the
+.SM FLEV
+pointer gives its associated file variable.
+These are used to free files at block exit as described in section 3.3
+below.
+.PP
+The
+FNAME
+and
+PFNAME
+give the associated
+file name for the file and the name to be used when printing
+error diagnostics respectively.
+Although these names are usually the same,
+.I input
+and
+.I output
+usually have no associated
+file name so the distinction is necessary.
+.PP
+The
+FUNIT
+word contains
+a set of flags.
+whose representations are:
+.TS
+center;
+l l l.
+EOF 0x0100 At end-of-file
+EOLN 0x0200 At end-of-line (text files only)
+SYNC 0x0400 File window is out of sync
+TEMP 0x0800 File is temporary
+FREAD 0x1000 File is open for reading
+FWRITE 0x2000 File is open for writing
+FTEXT 0x4000 File is a text file; process EOLN
+FDEF 0x8000 File structure created, but file not opened
+.TE
+.PP
+The
+EOF
+and
+EOLN
+bits here reflect the associated built-in function values.
+TEMP
+specifies that the file has a generated temporary name and that
+it should therefore be removed when its block exits.
+FREAD
+and
+FWRITE
+specify that
+.I reset
+and
+.I rewrite
+respectively have been done on the file so that
+input or output operations can be done.
+FTEXT
+specifies the file is a text file so that
+EOLN
+processing should be done,
+with newline characters turned into blanks, etc.
+.PP
+The
+SYNC
+bit,
+when true,
+specifies that there is no usable image in the file buffer window.
+As discussed in the
+.I "Berkeley Pascal User's Manual,"
+the interactive environment necessitates having
+``input^'' undefined at the beginning
+of execution so that a program may print a prompt
+before the user is required to type input.
+The
+SYNC
+bit implements this.
+When it is set,
+it specifies that the element in the window
+must be updated before it can be used.
+This is never done until necessary.
+.NH 2
+Initialization of files
+.PP
+All the variables in the Pascal runtime environment are cleared to zero on
+block entry.
+This is necessary for simple processing of files.
+If a file is unused, its pointer will be
+.B nil.
+All references to an inactive file are thus references through a
+.B nil
+pointer.
+If the Pascal system did not clear storage to zero before execution
+it would not be possible to detect inactive files in this simple way;
+it would probably be necessary to generate (possibly complicated)
+code to initialize
+each file on block entry.
+.PP
+When a file is first mentioned in a
+.I reset
+or
+.I rewrite
+call,
+a buffer of the form described above is associated with it,
+and the necessary information about the file is placed in this
+buffer.
+The file is also linked into the active file chain.
+This chain is kept sorted by block mark address, the
+FLEV
+entries.
+.NH 2
+Block exit
+.PP
+When block exit occurs the interpreter must free the files that are in
+use in the block
+and their associated buffers.
+This is simple and efficient because the files in the active file chain are
+sorted by increasing block mark address.
+This means that the files for the current block will be at the front
+of the chain.
+For each file that is no longer accessible
+the interpreter first flushes the files buffer
+if it is an output file.
+The interpreter then returns the file buffer and the files structure and window
+to the free space in the heap and removes the file from the active file chain.
+.NH 2
+Flushing
+.PP
+Flushing all the file buffers at abnormal termination,
+or on a call to the procedure
+.I flush
+or
+.I message
+is done by flushing
+each file on the file chain that has the
+FWRITE
+bit set in its flags word.
+.NH 2
+The active file
+.PP
+For input-output,
+.I px
+maintains a notion of an active file.
+Each operation that references a file makes the file
+it will be using the active file and then does its operation.
+A subtle point here is that one may do a procedure call to
+.I write
+that involves a call to a function that references another file,
+thereby destroying the active file set up before the
+.I write.
+Thus the active file is saved at block entry
+in the block mark and restored at block exit.\*(Dg
+.FS
+\*(dg\ It would probably be better to dispense with the notion of
+active file and use another mechanism that did not involve extra
+overhead on each procedure and function call.
+.FE
+.NH 2
+File operations
+.PP
+Files in Pascal can be used in two distinct ways:
+as the object of
+.I read,
+.I write,
+.I get,
+and
+.I put
+calls, or indirectly as though they were pointers.
+The second use as pointers must be careful
+not to destroy the active file in a reference such as
+.LS
+write(output, input\(ua)
+.LE
+or the system would incorrectly write on the input device.
+.PP
+The fundamental operator related to the use of a file is
+.SM FNIL.
+This takes the file variable, as a pointer,
+insures that the pointer is not
+.B nil,
+and also that a usable image is in the file window,
+by forcing the
+.SM SYNC
+bit to be cleared.
+.PP
+A simple example that demonstrates the use of the file operators
+is given by
+.LS
+writeln(f)
+.LE
+that produces
+.DS
+.mD
+.TS
+lp-2w(8) l.
+RV:\fIl f\fP
+UNIT
+WRITLN
+.TE
+.DE
+.NH 2
+Read operations
+.SH
+GET
+.IP
+Advance the active file to the next input element.
+.SH
+FNIL
+.IP
+A file pointer is on the stack. Insure that the associated file is active
+and that the file is synced so that there is input available in the window.
+.SH
+READ*
+.IP
+If the file is a text file, read a block of text
+and convert it to the internal type of the specified
+operand. If the file is not a text file then
+do an unformatted read of the next record.
+The procedure
+.SM READLN
+reads upto and including the next end of line character.
+.SH
+READE A
+.IP
+The operator
+.SM READE
+reads a string name of an enumerated type and converts it
+to its internal value.
+.SM READE
+takes a pointer to a data structure as shown in figure 3.2.
+.so fig3.2.n
+See the description of
+.SM NAM
+in the next section for an example.
+.NH 2
+Write operations
+.SH
+PUT
+.IP
+Output the element in the active file window.
+.SH
+WRITEF s
+.IP
+The argument(s) on the stack are output
+by the
+.I fprintf
+standard
+.SM I/O
+library routine.
+The sub-opcode
+.I s
+specifies the number
+of longword arguments on the stack.
+.SH
+WRITEC
+.IP
+The character on the top of the stack is output
+without formatting. Formatted characters must be output with
+.SM WRITEF .
+.SH
+WRITES
+.IP
+The string specified by the pointer on the top of the stack is output
+by the
+.I fwrite
+standard
+.SM I/O
+library routine.
+All characters including nulls are printed.
+.SH
+WRITLN
+.IP
+A linefeed is output to the active file.
+The line-count for the file is
+incremented and checked against the line limit.
+.SH
+PAGE
+.IP
+A formfeed is output to the active file.
+.SH
+NAM A
+.IP
+The value on the top of the stack is converted to a pointer
+to an enumerated type string name.
+The address
+.SM A
+points to an enumerated type structure identical
+to that used by
+.SM READE .
+An error is raised if the value is out of range.
+The form of this structure for the predefined type
+.B boolean
+is shown in figure 3.3.
+.so fig3.3.n
+The code for
+.SM NAM
+is
+.DS
+.mD
+_NAM:
+ \fBincl\fR lc
+ \fBaddl3\fR (lc)+,ap,r6 #r6 points to scalar name list
+ \fBmovl\fR (sp)+,r3 #r3 has data value
+ \fBcmpw\fR r3,(r6)+ #check value out of bounds
+ \fBbgequ\fR enamrng
+ \fBmovzwl\fR (r6)[r3],r4 #r4 has string index
+ \fBpushab\fR (r6)[r4] #push string pointer
+ \fBjmp\fR (loop)
+enamrng:
+ \fBmovw\fR $ENAMRNG,_perrno
+ \fBjbr\fR error
+.DE
+The address of the table is calculated by adding the base address
+of the interpreter code,
+.I ap
+to the offset pointed to by
+.I lc .
+The first word of the table gives the number of records and
+provides a range check of the data to be output.
+The pointer is then calculated as
+.DS
+.mD
+tblbase = ap + A;
+size = *tblbase++;
+return(tblbase + tblbase[value]);
+.DE
+.SH
+MAX s,w
+.IP
+The sub-opcode
+.I s
+is subtracted from the integer on the top of the stack.
+The maximum of the result and the second argument,
+.I w ,
+replaces the value on the top of the stack.
+This function verifies that variable specified
+width arguments are non-negative, and meet certain minimum width
+requirements.
+.SH
+MIN s
+.IP
+The minimum of the value on the top of the stack
+and the sub-opcode replaces the value on the top
+of the stack.
+.sp 1
+.LP
+The uses of files and the file operations are summarized
+in an example which outputs a real variable (r) with a variable
+width field (i).
+.LS
+writeln('r =',r:i,' ',true);
+.LE
+that generates the code
+.DS
+.mD
+.TS
+lp-2w(8) l.
+UNITOUT
+FILE
+CON14:1
+CON14:3
+LVCON:4 "r ="
+WRITES
+RV8\fI:l r\fP
+RV4\fI:l i\fP
+MAX:8 1
+RV4\fI:l i\fP
+MAX:1 1
+LVCON:8 " %*.*E"
+FILE
+WRITEF:6
+CONC4 \' \'
+WRITEC
+CON14:1
+NAM \fIbool\fP
+LVCON:4 "%s"
+FILE
+WRITEF:3
+WRITLN
+.TE
+.DE
+.PP
+Here the operator
+.SM UNITOUT
+is an abbreviated form of the operator
+.SM UNIT
+that is used when the file to be made active is
+.I output .
+A file descriptor, record count, string size, and a pointer
+to the constant string ``r ='' are pushed
+and then output by
+.SM WRITES .
+Next the value of
+.I r
+is pushed on the stack
+and the precision size is calculated by taking
+seven less than the width, but not less than one.
+This is followed by the width that is reduced by
+one to leave space for the required leading blank.
+If the width is too narrow, it
+is expanded by
+.I fprintf .
+A pointer to the format string is pushed followed
+by a file descriptor and the operator
+.SM WRITEF
+that prints out
+.I r .
+The value of six on
+.SM WRITEF
+comes from two longs for
+.I r
+and a long each for the precision, width, format string pointer,
+and file descriptor.
+The operator
+.SM CONC4
+pushes the
+.I blank
+character onto a long on the stack that is then printed out by
+.SM WRITEC .
+The internal representation for
+.I true
+is pushed as a long onto the stack and is
+then replaced by a pointer to the string ``true''
+by the operator
+.SM NAM
+using the table
+.I bool
+for conversion.
+This string is output by the operator
+.SM WRITEF
+using the format string ``%s''.
+Finally the operator
+.SM WRITLN
+appends a newline to the file.
+.NH 2
+File activation and status operations
+.SH
+UNIT*
+.IP
+The file pointed to by the file pointer on the top
+of the stack is converted to be the active file.
+The opcodes
+.SM UNITINP
+and
+.SM UNITOUT
+imply standard input and output respectively
+instead of explicitly pushing their file pointers.
+.SH
+FILE
+.IP
+The standard
+.SM I/O
+library file descriptor associated with the active file
+is pushed onto the stack.
+.SH
+EOF
+.IP
+The file pointed to by the file pointer on the top
+of the stack is checked for end of file. A boolean
+is returned with
+.I true
+indicating the end of file condition.
+.SH
+EOLN
+.IP
+The file pointed to by the file pointer on the top
+of the stack is checked for end of line. A boolean
+is returned with
+.I true
+indicating the end of line condition.
+Note that only text files can check for end of line.
+.NH 2
+File housekeeping operations
+.SH
+DEFNAME
+.IP
+Four data items are passed on the stack;
+the size of the data type associated with the file,
+the maximum size of the file name,
+a pointer to the file name,
+and a pointer to the file variable.
+A file record is created with the specified window size
+and the file variable set to point to it.
+The file is marked as defined but not opened.
+This allows
+.B program
+statement association of file names with file variables
+before their use by a
+.SM RESET
+or a
+.SM REWRITE .
+.SH
+BUFF s
+.IP
+The sub-opcode is placed in the external variable
+.I _bufopt
+to specify the amount of I/O buffering that is desired.
+The current options are:
+.DS
+0 \- character at a time buffering
+1 \- line at a time buffering
+2 \- block buffering
+.DE
+The default value is 1.
+.SH
+RESET
+.br
+REWRITE
+.IP
+Four data items are passed on the stack;
+the size of the data type associated with the file,
+the maximum size of the name (possibly zero),
+a pointer to the file name (possibly null),
+and a pointer to the file variable.
+If the file has never existed it is created as in
+.SM DEFNAME .
+If no file name is specified and no previous name exists
+(for example one created by
+.SM DEFNAME
+) then a system temporary name is created.
+.SM RESET
+then opens the file for input, while
+.SM REWRITE
+opens the file for output.
+.sp 1
+.PP
+The three remaining file operations are
+.SM FLUSH
+that flushes the active file,
+.SM REMOVE
+that takes the pointer to a file name and removes the
+specified file, and
+.SM MESSAGE
+that flushes all the output files and sets the
+standard error file to be the active file.
diff --git a/share/doc/papers/px/pxin4.n b/share/doc/papers/px/pxin4.n
new file mode 100644
index 0000000..a4ee4b4
--- /dev/null
+++ b/share/doc/papers/px/pxin4.n
@@ -0,0 +1,67 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)pxin4.n 5.2 (Berkeley) 4/17/91
+.\"
+.if !\n(xx .so tmac.p
+.nr H1 3
+.if n .ND
+.NH
+Conclusions
+.PP
+It is appropriate to consider,
+given the amount of time invested in rewriting the interpreter,
+whether the time was well spent, or whether a code-generator
+could have been written with an equivalent amount of effort.
+The Berkeley Pascal system is being modified to interface
+to the code generator of the portable C compiler with
+not much more work than was involved in rewritting
+.I px .
+However this compiler will probably not supercede the interpreter
+in an instructional environment as the
+necessary loading and assembly processes will slow the
+compilation process to a noticeable degree.
+This effect will be further exaggerated because
+student users spend more time in compilation than in execution.
+Measurements over the course of a quarter at Berkeley with a mixture
+of students from beginning programming to upper division compiler
+construction show that the amount of time in compilation exceeds the amount
+of time spent in the interpreter, the ratio being approximately 60/40.
+.PP
+A more promising approach might have been a throw-away code generator
+such as was done for the
+.SM
+WATFIV
+.NL
+system.
+However the addition of high-quality post-mortem and interactive
+debugging facilities become much more difficult to provide than
+in the interpreter environment.
diff --git a/share/doc/papers/px/table2.1.n b/share/doc/papers/px/table2.1.n
new file mode 100644
index 0000000..9f142d9
--- /dev/null
+++ b/share/doc/papers/px/table2.1.n
@@ -0,0 +1,83 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)table2.1.n 5.2 (Berkeley) 4/17/91
+.\"
+.DS L
+.TS
+box center;
+c s s
+c s s
+c c c
+n ap-2 a.
+Table 2.1 \- Operator Suffixes
+=
+.sp
+Unary operator suffixes
+.sp .1i
+Suffix Example Argument type
+2 NEG2 Short integer (2 bytes)
+4 SQR4 Long integer (4 bytes)
+8 ABS8 Real (8 bytes)
+.sp
+_
+.sp
+.T&
+c s s
+c c c
+n ap-2 a.
+Binary operator suffixes
+.sp .1i
+Suffix Example Argument type
+2 ADD2 Two short integers
+24 MUL24 Short above long integer
+42 REL42 Long above short integer
+4 DIV4 Two long integers
+28 DVD28 Short integer above real
+48 REL48 Long integer above real
+82 SUB82 Real above short integer
+84 MUL84 Real above long integer
+8 ADD8 Two reals
+.sp
+_
+.sp
+.T&
+c s s
+c c c
+n ap-2 a.
+Other Suffixes
+.sp .1i
+Suffix Example Argument types
+T ADDT Sets
+G RELG Strings
+.sp
+.TE
+.DE
diff --git a/share/doc/papers/px/table2.2.n b/share/doc/papers/px/table2.2.n
new file mode 100644
index 0000000..9a3f1db
--- /dev/null
+++ b/share/doc/papers/px/table2.2.n
@@ -0,0 +1,85 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)table2.2.n 5.2 (Berkeley) 4/17/91
+.\"
+.DS L
+.TS
+box center;
+c s
+c | c
+ci | aw(3.25i).
+Table 2.2 \- Inline data type codes
+_
+Code Description
+=
+a T{
+.fi
+An address offset is given in the word
+following the instruction.
+T}
+_
+A T{
+An address offset is given in the four bytes following the instruction.
+T}
+_
+l T{
+An index into the display
+is given in the sub-opcode.
+T}
+_
+r T{
+A relational operator is encoded in the sub-opcode. (see section 2.3)
+T}
+_
+s T{
+A small integer is
+placed in the sub-opcode, or in the next word
+if it is zero or too large.
+T}
+_
+v T{
+Variable length inline data.
+T}
+_
+w T{
+A word value in the following word.
+T}
+_
+W T{
+A long value in the following four bytes.
+T}
+_
+" T{
+An inline constant string.
+T}
+.TE
+.DE
diff --git a/share/doc/papers/px/table2.3.n b/share/doc/papers/px/table2.3.n
new file mode 100644
index 0000000..51796ae
--- /dev/null
+++ b/share/doc/papers/px/table2.3.n
@@ -0,0 +1,45 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)table2.3.n 5.2 (Berkeley) 4/17/91
+.\"
+.TS H
+box center;
+c s s
+lw(14) | lw(12) | lw(40)
+lp-2 | a | l.
+Table 2.3 \- Machine operations
+_
+Mnemonic Reference Description
+=
+.TH
+.so fig2.3.n
+.TE
diff --git a/share/doc/papers/px/table3.1.n b/share/doc/papers/px/table3.1.n
new file mode 100644
index 0000000..26db82e
--- /dev/null
+++ b/share/doc/papers/px/table3.1.n
@@ -0,0 +1,47 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)table3.1.n 5.2 (Berkeley) 4/17/91
+.\"
+.TS
+center;
+n l l.
+\-108 FNAME Text name of associated UNIX file
+\-30 LCOUNT Current count of lines output
+\-26 LLIMIT Maximum number of lines permitted
+\-22 FBUF UNIX FILE pointer
+\-18 FCHAIN Chain to next file
+\-14 FLEV Pointer to associated file variable
+\-10 PFNAME Pointer to name of file for error messages
+\-6 FUNIT File status flags
+\-4 FSIZE Size of elements in the file
+0 File window element
+.TE
diff --git a/share/doc/papers/px/tmac.p b/share/doc/papers/px/tmac.p
new file mode 100644
index 0000000..b6f381a
--- /dev/null
+++ b/share/doc/papers/px/tmac.p
@@ -0,0 +1,113 @@
+.\" Copyright (c) 1979 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)tmac.p 5.2 (Berkeley) 4/17/91
+.\"
+'if \n(FM=0 'so /usr/lib/tmac/tmac.s
+.if n .nr FM 1.2i
+.if t .tr *\(**=\(eq/\(sl+\(pl
+.bd S B 3
+.de mD
+.ta 8n 17n 42n
+..
+.de SM
+.if "\\$1"" .ps -2
+.if !"\\$1"" \s-2\\$1\s0\\$2
+..
+.de LG
+.if "\\$1"" .ps +2
+.if !"\\$1"" \s+2\\$a\s0\\$2
+..
+.de HP
+.nr pd \\n(PD
+.nr PD 0
+.if \\n(.$=0 .IP
+.if \\n(.$=1 .IP "\\$1"
+.if \\n(.$>=2 .IP "\\$1" "\\$2"
+.nr PD \\n(pd
+..
+.de ZP
+.nr pd \\n(PD
+.nr PD 0
+.PP
+.nr PD \\n(pd
+..
+.de LS \"LS - Literal display; ASCII DS
+.if \\n(.$=0 .DS
+.if \\n(.$=1 \\$1
+.if \\n(.$>1 \\$1 "\\$2"
+.if t .tr '\'`\`^\(ua-\(mi
+.if t .tr _\(ul
+..
+.de LE \"LE - End literal display
+.DE
+.tr ''``__--^^
+..
+.de UP
+Berkeley Pascal\\$1
+..
+.de PD
+\s-2PDP\s0
+.if \\n(.$=0 11/70
+.if \\n(.$>0 11/\\$1
+..
+.de DK
+Digital Equipment Corporation\\$1
+..
+.de PI
+.I pi \\$1
+..
+.de Xp
+.I Pxp \\$1
+..
+.de XP
+.I pxp \\$1
+..
+.de IX
+.I pix \\$1
+..
+.de X
+.I px \\$1
+..
+.de PX
+.I px \\$1
+..
+.if n .ds dg +
+.if t .ds dg \(dg
+.if n .ds Dg \*(dg
+.if t .ds Dg \v'-0.3m'\s-2\*(dg\s0\v'0.3m'
+.if n .ds dd *
+.if t .ds dd \(dd
+.if n .ds Dd \*(dd
+.if t .ds Dd \v'-0.3m'\s-2\*(dd\s0\v'0.3m'
+.if n .ds b \\fI
+.if t .ds b \\fB
+.nr xx 1
diff --git a/share/doc/papers/relengr/0.t b/share/doc/papers/relengr/0.t
new file mode 100644
index 0000000..7fb3290
--- /dev/null
+++ b/share/doc/papers/relengr/0.t
@@ -0,0 +1,91 @@
+.\" Copyright (c) 1989 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 5.1 (Berkeley) 4/17/91
+.\"
+.rm CM
+.nr PO 1.25i
+.ds CH "
+.ds CF "%
+.nr Fn 0 1
+.ds b3 4.3\s-1BSD\s+1
+.de KI
+.ds Lb "Fig. \\n+(Fn
+.KF
+.ce 1
+Figure \\n(Fn - \\$1.
+..
+.de SM
+\\s-1\\$1\\s+1\\$2
+..
+.de NM
+\&\fI\\$1\fP\\$2
+..
+.de RN
+\&\fI\\$1\fP\^(\^)\\$2
+..
+.de PN
+\&\fB\\$1\fP\\$2
+..
+.TL
+The Release Engineering of 4.3\s-1BSD\s0
+.AU
+Marshall Kirk McKusick
+.AU
+Michael J. Karels
+.AU
+Keith Bostic
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.AB
+This paper describes an approach used by a small group of people
+to develop and integrate a large software system.
+It details the development and release engineering strategy
+used during the preparation of the \*(b3 version of the UNIX\(dg
+.FS
+\(dgUNIX is a registered trademark of AT&T in the US and other countries.
+.FE
+operating system.
+Each release cycle is divided into an initial development phase
+followed by a release engineering phase.
+The release engineering of the distribution is done in three steps.
+The first step has an informal control policy for tracking modifications;
+it results in an alpha distribution.
+The second step has more rigid change mechanisms in place;
+it results in a beta release.
+During the final step changes are tracked very closely;
+the result is the final distribution.
+.AE
+.LP
diff --git a/share/doc/papers/relengr/1.t b/share/doc/papers/relengr/1.t
new file mode 100644
index 0000000..6fbe287
--- /dev/null
+++ b/share/doc/papers/relengr/1.t
@@ -0,0 +1,69 @@
+.\" Copyright (c) 1989 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 5.1 (Berkeley) 4/17/91
+.\"
+.NH
+Introduction
+.PP
+The Computer Systems Research Group (\c
+.SM CSRG )
+has always been a small group of software developers.
+This resource limitation requires careful software-engineering management
+as well as careful coordination of both
+.SM CSRG
+personnel and the members of the general community who
+contribute to the development of the system.
+.PP
+Releases from Berkeley alternate between those that introduce
+major new facilities and those that provide bug fixes and efficiency
+improvements.
+This alternation allows timely releases, while providing for refinement,
+tuning, and correction of the new facilities.
+The timely followup of ``cleanup'' releases reflects the importance
+.SM CSRG
+places on providing a reliable and robust system on which its
+user community can depend.
+.PP
+The development of the Berkeley Software Distribution (\c
+.SM BSD )
+illustrates an \fIadvantage\fP of having a few
+principal developers:
+the developers all understand the entire system thoroughly enough
+to be able to coordinate their own work with
+that of other people to produce a coherent final system.
+Companies with large development organizations find
+this result difficult to duplicate.
+This paper describes the process by which
+the development effort for \*(b3 was managed.
+.[
+design and implementation
+.]
diff --git a/share/doc/papers/relengr/2.t b/share/doc/papers/relengr/2.t
new file mode 100644
index 0000000..0c3ce8c
--- /dev/null
+++ b/share/doc/papers/relengr/2.t
@@ -0,0 +1,146 @@
+.\" Copyright (c) 1989 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.t 5.1 (Berkeley) 4/17/91
+.\"
+.NH
+System Development
+.PP
+The first phase of each Berkeley system is its development.
+.SM CSRG
+maintains a continuously evolving list of projects that are candidates
+for integration into the system.
+Some of these are prompted by emerging ideas from the research world,
+such as the availability of a new technology, while other additions
+are suggested by the commercial world, such as the introduction of
+new standards like
+.SM POSIX ,
+and still other projects are emergency responses to situations like
+the Internet Worm.
+.PP
+These projects are ordered based on the perceived benefit of the
+project as opposed to its difficulty;
+the most important are selected for inclusion in each new release.
+Often there is a prototype available from a group outside
+.SM CSRG .
+Because of the limited staff at
+.SM CSRG ,
+this prototype is obtained to use as a starting base
+for integration into the
+.SM BSD
+system.
+Only if no prototype is available is the project begun in-house.
+In either case, the design of the facility is forced to conform to the
+.SM CSRG
+style.
+.PP
+Unlike other development groups, the staff of
+.SM CSRG
+specializes by projects rather than by particular parts
+of the system;
+a staff person will be responsible for all aspects of a project.
+This responsibility starts at the associated kernel device drivers;
+it proceeds up through the rest of the kernel,
+through the C library and system utility programs,
+ending at the user application layer.
+This staff person is also responsible for related documentation,
+including manual pages.
+Many projects proceed in parallel,
+interacting with other projects as their paths cross.
+.PP
+All source code, documentation, and auxiliary files are kept
+under a source code control system.
+During development,
+this control system is critical for notifying people
+when they are colliding with other ongoing projects.
+Even more important, however,
+is the audit trail maintained by the control system that
+is critical to the release engineering phase of the project
+described in the next section.
+.PP
+Much of the development of
+.SM BSD
+is done by personnel that are located at other institutions.
+Many of these people not only have interim copies of the release
+running on their own machines,
+but also have user accounts on the main development
+machine at Berkeley.
+Such users are commonly found logged in at Berkeley over the
+Internet, or sometimes via telephone dialup, from places as far away
+as Massachusetts or Maryland, as well as from closer places, such as
+Stanford.
+For the \*(b3 release,
+certain users had permission to modify the master copy of the
+system source directly.
+People given access to the master sources
+are carefully screened beforehand,
+but are not closely supervised.
+Their work is checked at the end of the beta-test period by
+.SM CSRG
+personnel who back out inappropriate changes.
+Several facilities, including the
+Fortran and C compilers,
+as well as important system programs, for example,
+.PN telnet
+and
+.PN ftp ,
+include significant contributions from people who did not work
+directly for
+.SM CSRG .
+One important exception to this approach is that changes to the kernel
+are made only by
+.SM CSRG
+personnel, although the changes are often suggested by the larger community.
+.PP
+The development phase continues until
+.SM CSRG
+decides that it is appropriate to make a release.
+The decision to halt development and transition to release mode
+is driven by several factors.
+The most important is that enough projects have been completed
+to make the system significantly superior to the previously released
+version of the system.
+For example,
+\*(b3 was released primarily because of the need for
+the improved networking capabilities and the markedly
+improved system performance.
+Of secondary importance is the issue of timing.
+If the releases are too infrequent, then
+.SM CSRG
+will be inundated with requests for interim releases.
+Conversely,
+if systems are released too frequently,
+the integration cost for many vendors will be too high,
+causing them to ignore the releases.
+Finally,
+the process of release engineering is long and tedious.
+Frequent releases slow the rate of development and
+cause undue tedium to the staff.
diff --git a/share/doc/papers/relengr/3.t b/share/doc/papers/relengr/3.t
new file mode 100644
index 0000000..8d89ded
--- /dev/null
+++ b/share/doc/papers/relengr/3.t
@@ -0,0 +1,390 @@
+.\" Copyright (c) 1989 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)3.t 5.1 (Berkeley) 4/17/91
+.\"
+.NH
+System Release
+.PP
+Once the decision has been made to halt development
+and begin release engineering,
+all currently unfinished projects are evaluated.
+This evaluation involves computing the time required to complete
+the project as opposed to how important the project is to the
+upcoming release.
+Projects that are not selected for completion are
+removed from the distribution branch of the source code control system
+and saved on branch deltas so they can be retrieved,
+completed, and merged into a future release;
+the remaining unfinished projects are brought to orderly completion.
+.PP
+Developments from
+.SM CSRG
+are released in three steps: alpha, beta, and final.
+Alpha and beta releases are not true distributions\(emthey
+are test systems.
+Alpha releases are normally available to only a few sites,
+usually those working closely with
+.SM CSRG .
+More sites are given beta releases,
+as the system is closer to completion,
+and needs wider testing to find more obscure problems.
+For example, \*(b3 alpha was distributed to about fifteen
+sites, while \*(b3 beta ran at more than a hundred.
+.NH 2
+Alpha Distribution Development
+.PP
+The first step in creating an alpha distribution is to evaluate the
+existing state of the system and to decide what software should be
+included in the release.
+This decision process includes not only deciding what software should
+be added, but also what obsolete software ought to be retired from the
+distribution.
+The new software includes the successful projects that have been
+completed at
+.SM CSRG
+and elsewhere, as well as some portion of the vast quantity of
+contributed software that has been offered during the development
+period.
+.PP
+Once an initial list has been created,
+a prototype filesystem corresponding to the distribution
+is constructed, typically named
+.PN /nbsd .
+This prototype will eventually turn into the master source tree for the
+final distribution.
+During the period that the alpha distribution is being created,
+.PN /nbsd
+is mounted read-write, and is highly fluid.
+Programs are created and deleted,
+old versions of programs are completely replaced,
+and the correspondence between the sources and binaries
+is only loosely tracked.
+People outside
+.SM CSRG
+who are helping with the distribution are free to
+change their parts of the distribution at will.
+.PP
+During this period the newly forming distribution is
+checked for interoperability.
+For example,
+in \*(b3 the output of context differences from
+.PN diff
+was changed to merge overlapping sections.
+Unfortunately, this change broke the
+.PN patch
+program which could no longer interpret the output of
+.PN diff .
+Since the change to
+.PN diff
+and the
+.PN patch
+program had originated outside Berkeley,
+.SM CSRG
+had to coordinate the efforts of the respective authors
+to make the programs work together harmoniously.
+.PP
+Once the sources have stabilized,
+an attempt is made to compile the entire source tree.
+Often this exposes errors caused by changed header files,
+or use of obsoleted C library interfaces.
+If the incompatibilities affect too many programs,
+or require excessive amounts of change in the programs
+that are affected,
+the incompatibility is backed out or some backward-compatible
+interface is provided.
+The incompatibilities that are found and left in are noted
+in a list that is later incorporated into the release notes.
+Thus, users upgrading to the new system can anticipate problems
+in their own software that will require change.
+.PP
+Once the source tree compiles completely,
+it is installed and becomes the running system that
+.SM CSRG
+uses on its main development machine.
+Once in day-to-day use,
+other interoperability problems become apparent
+and are resolved.
+When all known problems have been resolved, and the system has been
+stable for some period of time, an alpha distribution tape is made
+from the contents of
+.PN /nbsd .
+.PP
+The alpha distribution is sent out to a small set of test sites.
+These test sites are selected as having a
+sophisticated user population, not only capable of finding bugs,
+but also of determining their cause and developing a fix for the problem.
+These sites are usually composed of groups that are contributing
+software to the distribution or groups that have a particular expertise
+with some portion of the system.
+.NH 2
+Beta Distribution Development
+.PP
+After the alpha tape is created,
+the distribution filesystem is mounted read-only.
+Further changes are requested in a change log rather than
+being made directly to the distribution.
+The change requests are inspected and implemented by a
+.SM CSRG
+staff person, followed by a compilation of the affected
+programs to ensure that they still build correctly.
+Once the alpha tape has been cut,
+changes to the distribution are no longer made by people outside
+.SM CSRG .
+.PP
+As the alpha sites install and begin running the alpha distribution,
+they monitor the problems that they encounter.
+For minor bugs, they typically report back the bug along with
+a suggested fix.
+Since many of the alpha sites are selected from among the people
+working closely with
+.SM CSRG ,
+they often have accounts on, and access to, the primary
+.SM CSRG
+development machine.
+Thus, they are able to directly install the fix themselves,
+and simply notify
+.SM CSRG
+when they have fixed the problem.
+After verifying the fix, the affected files are added to
+the list to be updated on
+.PN /nbsd .
+.PP
+The more important task of the alpha sites is to test out the
+new facilities that have been added to the system.
+The alpha sites often find major design flaws
+or operational shortcomings of the facilities.
+When such problems are found,
+the person in charge of that facility is responsible
+for resolving the problem.
+Occasionally this requires redesigning and reimplementing
+parts of the affected facility.
+For example,
+in 4.2\s-1BSD\s+1,
+the alpha release of the networking system did not have connection queueing.
+This shortcoming prevented the network from handling many
+connections to a single server.
+The result was that the networking interface had to be
+redesigned to provide this functionality.
+.PP
+The alpha sites are also responsible for ferreting out interoperability
+problems between different utilities.
+The user populations of the test sites differ from the user population at
+.SM CSRG ,
+and, as a result, the utilities are exercised in ways that differ
+from the ways that they are used at
+.SM CSRG .
+These differences in usage patterns turn up problems that
+do not occur in our initial test environment.
+.PP
+The alpha sites frequently redistribute the alpha tape to several
+of their own alpha sites that are particularly interested
+in parts of the new system.
+These additional sites are responsible for reporting
+problems back to the site from which they received the distribution,
+not to
+.SM CSRG .
+Often these redistribution sites are less sophisticated than the
+direct alpha sites, so their reports need to be filtered
+to avoid spurious, or site dependent, bug reports.
+The direct alpha sites sift through the reports to find those that
+are relevant, and usually verify the suggested fix if one is given,
+or develop a fix if none is provided.
+This hierarchical testing process forces
+bug reports, fixes, and new software
+to be collected, evaluated, and checked for inaccuracies
+by first-level sites before being forwarded to
+.SM CSRG ,
+allowing the developers at
+.SM CSRG
+to concentrate on tracking the changes being made to the system
+rather than sifting through information (often voluminous) from every
+alpha-test site.
+.PP
+Once the major problems have been attended to,
+the focus turns to getting the documentation synchronized
+with the code that is being shipped.
+The manual pages need to be checked to be sure that
+they accurately reflect any changes to the programs that
+they describe.
+Usually the manual pages are kept up to date as
+the program they describe evolves.
+However, the supporting documents frequently do not get changed,
+and must be edited to bring them up to date.
+During this review, the need for other documents becomes evident.
+For example, it was
+during this phase of \*(b3 that it was decided
+to add a tutorial document on how to use the socket
+interprocess communication primitives.
+.PP
+Another task during this period is to contact the people that
+have contributed complete software packages
+(such as
+.PN RCS
+or
+.PN MH )
+in previous releases to see if they wish to
+make any revisions to their software.
+For those who do,
+the new software has to be obtained,
+and tested to verify that it compiles and runs
+correctly on the system to be released.
+Again, this integration and testing can often be done by the
+contributors themselves by logging directly into the master machine.
+.PP
+After the stream of bug reports has slowed down
+to a reasonable level,
+.SM CSRG
+begins a careful review of all the changes to the
+system since the previous release.
+The review is done by running a recursive
+.PN diff
+of the entire source tree\(emhere, of
+.PN /nbsd
+with 4.2\s-1BSD\s+1.
+All the changes are checked to ensure that they are reasonable,
+and have been properly documented.
+The process often turns up questionable changes.
+When such a questionable change is found,
+the source code control system log is examined to find
+out who made the change and what their explanation was
+for the change.
+If the log does not resolve the problem,
+the person responsible for the change is asked for an explanation
+of what they were trying to accomplish.
+If the reason is not compelling,
+the change is backed out.
+Facilities deemed inappropriate in \*(b3 included new options to
+the directory-listing command and a changed return value for the
+.RN fseek
+library routine;
+the changes were removed from the source before final distribution.
+Although this process is long and tedious,
+it forces the developers to obtain a coherent picture of the entire set of
+changes to the system.
+This exercise often turns up inconsistencies that would
+otherwise never be found.
+.PP
+The outcome of the comparison results in
+a pair of documents detailing
+changes to every user-level command
+.[
+Bug Fixes and Changes
+.]
+and to every kernel source file.
+.[
+Changes to the Kernel
+.]
+These documents are delivered with the final distribution.
+A user can look up any command by name and see immediately
+what has changed,
+and a developer can similarly look up any kernel
+file by name and get a summary of the changes to that file.
+.PP
+Having completed the review of the entire system,
+the preparation of the beta distribution is started.
+Unlike the alpha distribution, where pieces of the system
+may be unfinished and the documentation incomplete,
+the beta distribution is put together as if it were
+going to be the final distribution.
+All known problems are fixed, and any remaining development
+is completed.
+Once the beta tape has been prepared,
+no further changes are permitted to
+.PN /nbsd
+without careful review,
+as spurious changes made after the system has been
+.PN diff ed
+are unlikely to be caught.
+.NH 2
+Final Distribution Development
+.PP
+The beta distribution goes to more sites than the
+alpha distribution for three main reasons.
+First, as it is closer to the final release, more sites are willing
+to run it in a production environment without fear of catastrophic failures.
+Second, more commercial sites delivering
+.SM BSD -\c
+derived systems are interested in getting a preview of the
+upcoming changes in preparation for merging them into their
+own systems.
+Finally, because the beta tape has fewer problems,
+it is beneficial to offer it to more sites in hopes of
+finding as many of the remaining problems as possible.
+Also, by handing the system out to less sophisticated sites,
+issues that would be ignored by the users of the alpha sites
+become apparent.
+.PP
+The anticipation is that the beta tape will not require
+extensive changes to either the programs or the documentation.
+Most of the work involves sifting through the reported bugs
+to find those that are relevant and devising the minimal
+reasonable set of changes to fix them.
+After throughly testing the fix, it is listed in the update log for
+.PN /nbsd .
+One person at
+.SM CSRG
+is responsible for doing the update of
+.PN /nbsd
+and ensuring that everything affected by the change is rebuilt and tested.
+Thus, a change to a C library routine requires that the entire
+system be rebuilt.
+.PP
+During this period, the documentation is all printed and proofread.
+As minor changes are made to the manual pages and documentation,
+the affected pages must be reprinted.
+.PP
+The final step in the release process is to check the distribution tree
+to ensure that it is in a consistent state.
+This step includes verification that every file and directory
+on the distribution has the proper owner, group, and modes.
+All source files must be checked to be sure that they have
+appropriate copyright notices and source code control system headers.
+Any extraneous files must be removed.
+Finally, the installed binaries must be checked to ensure that they correspond
+exactly to the sources and libraries that are on the distribution.
+.PP
+This checking is a formidable task given that there are over 20,000 files on
+a typical distribution.
+Much of the checking can be done by a set of programs set to scan
+over the distribution tree.
+Unfortunately, the exception list is long, and requires
+hours of tedious hand checking; this has caused
+.SM CSRG
+to develop even
+more comprehensive validation programs for use in our next release.
+.PP
+Once the final set of checks has been run,
+the master tape can be made, and the official distribution started.
+As for the staff of
+.SM CSRG ,
+we usually take a brief vacation before plunging back into
+a new development phase.
diff --git a/share/doc/papers/relengr/Makefile b/share/doc/papers/relengr/Makefile
new file mode 100644
index 0000000..506fa7a
--- /dev/null
+++ b/share/doc/papers/relengr/Makefile
@@ -0,0 +1,12 @@
+# @(#)Makefile 1.6 (Berkeley) 6/8/93
+
+DIR= papers/relengr
+SRCS= 0.t 1.t 2.t 3.t
+MACROS= -ms
+EXTRA= ref.bib tmac.srefs
+REFER= /a/staff/mckusick/book/ref/refer -m -n -e -l -s -p ref.bib
+
+paper.ps: ${SRCS}
+ ${REFER} ${SRCS} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/relengr/ref.bib b/share/doc/papers/relengr/ref.bib
new file mode 100644
index 0000000..6f33cd7
--- /dev/null
+++ b/share/doc/papers/relengr/ref.bib
@@ -0,0 +1,26 @@
+%A M. K. McKusick
+%A J. M. Bloom
+%A M. J. Karels
+%T Bug Fixes and Changes in 4.3BSD
+%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
+%I \s-1USENIX\s0 Association
+%C Berkeley, CA
+%P 12:1\-22
+%D 1986
+
+%A M. J. Karels
+%T Changes to the Kernel in 4.3BSD
+%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
+%I \s-1USENIX\s0 Association
+%C Berkeley, CA
+%P 13:1\-32
+%D 1986
+
+%A S. J. Leffler
+%A M. K. McKusick
+%A M. J. Karels
+%A J. S. Quarterman
+%T The Design and Implementation of the 4.3BSD UNIX Operating System
+%I Addison-Wesley
+%C Reading, MA
+%D 1989
diff --git a/share/doc/papers/relengr/ref.bib.ig b/share/doc/papers/relengr/ref.bib.ig
new file mode 100644
index 0000000..fb24c6e
--- /dev/null
+++ b/share/doc/papers/relengr/ref.bib.ig
@@ -0,0 +1,3 @@
+ref.bib:0,249 mckusi bloom karels bug fixes change system manage manual berkel softwa distri virtua vax versio associ berkel 1986
+ref.bib:249,216 karels change kernel system manage manual berkel softwa distri virtua vax versio associ berkel 1986
+ref.bib:465,181 leffle mckusi karels quarte design implem unix operat system addiso wesley readin 1989
diff --git a/share/doc/papers/relengr/spell.ok b/share/doc/papers/relengr/spell.ok
new file mode 100644
index 0000000..13f5cf8
--- /dev/null
+++ b/share/doc/papers/relengr/spell.ok
@@ -0,0 +1,15 @@
+BSD
+Bostic
+CH
+CM
+CSRG
+Fn
+Karels
+Lb
+McKusick
+POSIX
+editted
+filesystem
+followup
+mothballed
+nbsd
diff --git a/share/doc/papers/relengr/tmac.srefs b/share/doc/papers/relengr/tmac.srefs
new file mode 100644
index 0000000..889e3fe
--- /dev/null
+++ b/share/doc/papers/relengr/tmac.srefs
@@ -0,0 +1,179 @@
+.\" @(#)tmac.srefs 1.14 11/2/88
+.\" REFER macros .... citations
+.de []
+.][ \\$1
+..
+.de ][
+.if \\$1>5 .tm Bad arg to []
+.[\\$1
+..
+.if n .ds [. [
+.\".if t .ds [. \s-2\v'-.4m'\f1
+.if t .ds [. [
+.if n .ds .] ]
+.\".if t .ds .] \v'.4m'\s+2\fP
+.if t .ds .] ]
+.ds (. \& [
+.ds .) ]
+.if n .ds [o ""
+.if n .ds [c ""
+.if t .ds [o ``
+.if t .ds [c ''
+.ds [e \\fIet al.\\fP
+.\" for author list in reference:
+.ds &1 &
+.\" for -m signal (auth1 and auth2, year):
+.ds &2 &
+.\" the next lines deal with the problem of .[1] or [1].
+.\" refer will write "linexxx\*(<.[1]\*(>.
+.\" and either "<." or ">." should produce the .;
+.\" similarly for , and ;
+.rm <. <, <;
+.if n .ds >. .
+.if t .ds >. .
+.if n .ds >, ,
+.if t .ds >, ,
+.if n .ds >; ;
+.if t .ds >; ;
+.de [5 \" tm style
+.FS
+.IP "\\*([F.\0"
+\\*([A, \\f2\\*([T\\f1,
+.ie \\n(TN \\*([M.
+.el Bell Laboratories internal memorandum (\\*([D).
+.RT
+.FE
+..
+.de [0 \" other
+.FS
+.nr [: 0
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \{.nr [: 1
+\\*([A\c\}
+.if !"\\*([T"" \{.if \\n([:>0 ,
+.nr [: 1
+\\f2\\*([T\\f1\c\}
+.if !"\\*([O""\{.if \\n([:>0 ,
+.nr [: 1
+.if \\n([O>0 .nr [: 0
+\\*([O\c
+.if \\n([O>0 \& \c\}
+.ie !"\\*([D"" \{.if \\n([:>0 ,
+.nr [: 1
+\\*([D\c\}
+.if \\n([:>0 \&.
+.RT
+.FE
+..
+.de [1 \" journal article
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \\*([A,
+.if !"\\*([T"" \\*([o\\*([T,\\*([c
+\\f2\\*([J\\f1\c
+.if !"\\*([V"" .if n \& Vol.\&\c
+.if !"\\*([V"" \& \\f3\\*([V\\f1\c
+.if !"\\*([N"" (\\*([N)\c
+.if !"\\*([P"" \{\
+.ie \\n([P>0 , pp. \c
+.el , p. \c
+\\*([P\c\}
+.if !"\\*([I"" .if "\\*([R"" , \\*([I\c
+.if !"\\*([O"" .if \\n([O=0 , \\*([O\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([O"" .if \\n([O>0 \\*([O
+.RT
+.FE
+..
+.de [2 \" book
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \\*([A,
+.if !"\\*([T"" \\f2\\*([T,\\f1
+\\*([I\c
+.if !"\\*([C"" , \\*([C\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([G"" Gov't. ordering no. \\*([G.
+.if !"\\*([O"" \\*([O
+.RT
+.FE
+..
+.de [4 \" report
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+\\*([A, \\*([o\\*([T,\\*([c
+\\*([R\c
+.if !"\\*([G"" \& (\\*([G)\c
+.if !"\\*([I"" , \\*([I\c
+.if !"\\*([C"" , \\*([C\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([O"" \\*([O
+.RT
+.FE
+..
+.de [3 \" article in book
+.FS
+.if !"\\*([F"" .IP "\\*([F.\0"
+.if !"\\*([A"" \\*([A,
+.if !"\\*([T"" \\*([o\\*([T,\\*([c
+.if !"\\*([P"" pp. \\*([P
+in \\f2\\*([B\\f1\c
+.if !"\\*([E"" , ed. \\*([E\c
+.if !"\\*([I"" , \\*([I\c
+.if !"\\*([C"" , \\*([C\c
+.if !"\\*([D"" \& (\\*([D)\c
+\&.
+.if !"\\*([O"" \\*([O
+.RT
+.FE
+..
+.de ]<
+.[<
+..
+.de [<
+.RT
+.ne 62p
+.ie \\n(rS \{\
+. rs
+. sp 4p
+.\}
+.el .sp 27p
+.po -2.5P
+.Li 2 30.5P
+\\s11\fBReferences\fP\s10
+.br
+.if \\n(Ns<2 \{\
+. nr Ns 1
+. ds ST References
+.\}
+.\"nr Tt 7
+.po
+.sp 8p
+.rm FS FE
+.\"sy echo '.T3 "\\\\t\\\\tReferences" \\n%' >>Toc
+.ns
+..
+.de [>
+.]>
+..
+.de ]>
+.sp
+..
+.de ]-
+.[-
+..
+.de [-
+.rm [V [P [A [T
+.rm [N [C [B [O
+.rm [R [I [E [D
+..
+.de ]]
+this is never
+executed
+and just
+uses up an end-of-file
+bug.
+..
diff --git a/share/doc/papers/sysperf/0.t b/share/doc/papers/sysperf/0.t
new file mode 100644
index 0000000..0c27a34
--- /dev/null
+++ b/share/doc/papers/sysperf/0.t
@@ -0,0 +1,247 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 5.1 (Berkeley) 4/17/91
+.\"
+.if n .ND
+.TL
+Measuring and Improving the Performance of Berkeley UNIX*
+.sp
+April 17, 1991
+.AU
+Marshall Kirk McKusick,
+Samuel J. Leffler\(dg,
+Michael J. Karels
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, CA 94720
+.AB
+.FS
+* UNIX is a trademark of AT&T Bell Laboratories.
+.FE
+.FS
+\(dg Samuel J. Leffler is currently employed by:
+Silicon Graphics, Inc.
+.FE
+.FS
+This work was done under grants from
+the National Science Foundation under grant MCS80-05144,
+and the Defense Advance Research Projects Agency (DoD) under
+ARPA Order No. 4031 monitored by Naval Electronic System Command under
+Contract No. N00039-82-C-0235.
+.FE
+The 4.2 Berkeley Software Distribution of
+.UX
+for the VAX\(dd
+.FS
+\(dd VAX, MASSBUS, UNIBUS, and DEC are trademarks of
+Digital Equipment Corporation.
+.FE
+had several problems that could severely affect the overall
+performance of the system.
+These problems were identified with
+kernel profiling and system tracing during day to day use.
+Once potential problem areas had been identified
+benchmark programs were devised to highlight the bottlenecks.
+These benchmarks verified that the problems existed and provided
+a metric against which to validate proposed solutions.
+This paper examines
+the performance problems encountered and describes
+modifications that have been made
+to the system since the initial distribution.
+.PP
+The changes to the system have consisted of improvements to the
+performance of the existing facilities,
+as well as enhancements to the current facilities.
+Performance improvements in the kernel include cacheing of path name
+translations, reductions in clock handling and scheduling overhead,
+and improved throughput of the network subsystem.
+Performance improvements in the libraries and utilities include replacement of
+linear searches of system databases with indexed lookup,
+merging of most network services into a single daemon,
+and conversion of system utilities to use the more efficient
+facilities available in 4.2BSD.
+Enhancements in the kernel include the addition of subnets and gateways,
+increases in many kernel limits,
+cleanup of the signal and autoconfiguration implementations,
+and support for windows and system logging.
+Functional extensions in the libraries and utilities include
+the addition of an Internet name server,
+new system management tools,
+and extensions to \fIdbx\fP to work with Pascal.
+The paper concludes with a brief discussion of changes made to
+the system to enhance security.
+All of these enhancements are present in Berkeley UNIX 4.3BSD.
+.AE
+.LP
+.sp 2
+CR Categories and Subject Descriptors:
+D.4.3
+.B "[Operating Systems]":
+File Systems Management \-
+.I "file organization, directory structures, access methods";
+D.4.8
+.B "[Operating Systems]":
+Performance \-
+.I "measurements, operational analysis";
+.sp
+Additional Keywords and Phrases:
+Berkeley UNIX,
+system performance,
+application program interface.
+.sp
+General Terms:
+UNIX operating system,
+measurement,
+performance.
+.de PT
+.lt \\n(LLu
+.pc %
+.nr PN \\n%
+.tl '\\*(LH'\\*(CH'\\*(RH'
+.lt \\n(.lu
+..
+.af PN i
+.ds LH Performance
+.ds RH Contents
+.bp 1
+.if t .ds CF April 17, 1991
+.if t .ds LF DRAFT
+.if t .ds RF McKusick, et. al.
+.ce
+.B "TABLE OF CONTENTS"
+.LP
+.sp 1
+.nf
+.B "1. Introduction"
+.LP
+.sp .5v
+.nf
+.B "2. Observation techniques
+\0.1. System maintenance tools
+\0.2. Kernel profiling
+\0.3. Kernel tracing
+\0.4. Benchmark programs
+.LP
+.sp .5v
+.nf
+.B "3. Results of our observations
+\0.1. User programs
+\0.1.1. Mail system
+\0.1.2. Network servers
+\0.2. System overhead
+\0.2.1. Micro-operation benchmarks
+\0.2.2. Path name translation
+\0.2.3. Clock processing
+\0.2.4. Terminal multiplexors
+\0.2.5. Process table management
+\0.2.6. File system buffer cache
+\0.2.7. Network subsystem
+\0.2.8. Virtual memory subsystem
+.LP
+.sp .5v
+.nf
+.B "4. Performance Improvements
+\0.1. Performance Improvements in the Kernel
+\0.1.1. Name Cacheing
+\0.1.2. Intelligent Auto Siloing
+\0.1.3. Process Table Management
+\0.1.4. Scheduling
+\0.1.5. Clock Handling
+\0.1.6. File System
+\0.1.7. Network
+\0.1.8. Exec
+\0.1.9. Context Switching
+\0.1.10. Setjmp and Longjmp
+\0.1.11. Compensating for Lack of Compiler Technology
+\0.2. Improvements to Libraries and Utilities
+\0.2.1. Hashed Databases
+\0.2.2. Buffered I/O
+\0.2.3. Mail System
+\0.2.4. Network Servers
+\0.2.5. The C Run-time Library
+\0.2.6. Csh
+.LP
+.sp .5v
+.nf
+.B "5. Functional Extensions
+\0.1. Kernel Extensions
+\0.1.1. Subnets, Broadcasts, and Gateways
+\0.1.2. Interface Addressing
+\0.1.3. User Control of Network Buffering
+\0.1.4. Number of File Descriptors
+\0.1.5. Kernel Limits
+\0.1.6. Memory Management
+\0.1.7. Signals
+\0.1.8. System Logging
+\0.1.9. Windows
+\0.1.10. Configuration of UNIBUS Devices
+\0.1.11. Disk Recovery from Errors
+\0.2. Functional Extensions to Libraries and Utilities
+\0.2.1. Name Server
+\0.2.2. System Management
+\0.2.3. Routing
+\0.2.4. Compilers
+.LP
+.sp .5v
+.nf
+.B "6. Security Tightening
+\0.1. Generic Kernel
+\0.2. Security Problems in Utilities
+.LP
+.sp .5v
+.nf
+.B "7. Conclusions
+.LP
+.sp .5v
+.nf
+.B Acknowledgements
+.LP
+.sp .5v
+.nf
+.B References
+.LP
+.sp .5v
+.nf
+.B "Appendix \- Benchmark Programs"
+.de _d
+.if t .ta .6i 2.1i 2.6i
+.\" 2.94 went to 2.6, 3.64 to 3.30
+.if n .ta .84i 2.6i 3.30i
+..
+.de _f
+.if t .ta .5i 1.25i 2.5i
+.\" 3.5i went to 3.8i
+.if n .ta .7i 1.75i 3.8i
+..
diff --git a/share/doc/papers/sysperf/1.t b/share/doc/papers/sysperf/1.t
new file mode 100644
index 0000000..88608ee
--- /dev/null
+++ b/share/doc/papers/sysperf/1.t
@@ -0,0 +1,81 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 5.1 (Berkeley) 4/17/91
+.\"
+.ds RH Introduction
+.af PN 1
+.bp 1
+.NH
+Introduction
+.PP
+The Berkeley Software Distributions of
+.UX
+for the VAX have added many new capabilities that were
+previously unavailable under
+.UX .
+The development effort for 4.2BSD concentrated on providing new
+facilities, and in getting them to work correctly.
+Many new data structures were added to the system to support
+these new capabilities.
+In addition,
+many of the existing data structures and algorithms
+were put to new uses or their old functions placed under increased demand.
+The effect of these changes was that
+mechanisms that were well tuned under 4.1BSD
+no longer provided adequate performance for 4.2BSD.
+The increased user feedback that came with the release of
+4.2BSD and a growing body of experience with the system
+highlighted the performance shortcomings of 4.2BSD.
+.PP
+This paper details the work that we have done since
+the release of 4.2BSD to measure the performance of the system,
+detect the bottlenecks,
+and find solutions to remedy them.
+Most of our tuning has been in the context of the real
+timesharing systems in our environment.
+Rather than using simulated workloads,
+we have sought to analyze our tuning efforts under
+realistic conditions.
+Much of the work has been done in the machine independent parts
+of the system, hence these improvements could be applied to
+other variants of UNIX with equal success.
+All of the changes made have been included in 4.3BSD.
+.PP
+Section 2 of the paper describes the tools and techniques
+available to us for measuring system performance.
+In Section 3 we present the results of using these tools, while Section 4
+has the performance improvements
+that have been made to the system based on our measurements.
+Section 5 highlights the functional enhancements that have
+been made to Berkeley UNIX 4.2BSD.
+Section 6 discusses some of the security problems that
+have been addressed.
diff --git a/share/doc/papers/sysperf/2.t b/share/doc/papers/sysperf/2.t
new file mode 100644
index 0000000..703cbb6
--- /dev/null
+++ b/share/doc/papers/sysperf/2.t
@@ -0,0 +1,258 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.t 5.1 (Berkeley) 4/17/91
+.\"
+.ds RH Observation techniques
+.NH
+Observation techniques
+.PP
+There are many tools available for monitoring the performance
+of the system.
+Those that we found most useful are described below.
+.NH 2
+System maintenance tools
+.PP
+Several standard maintenance programs are invaluable in
+observing the basic actions of the system.
+The \fIvmstat\fP(1)
+program is designed to be an aid to monitoring
+systemwide activity. Together with the
+\fIps\fP\|(1)
+command (as in ``ps av''), it can be used to investigate systemwide
+virtual memory activity.
+By running \fIvmstat\fP
+when the system is active you can judge the system activity in several
+dimensions: job distribution, virtual memory load, paging and swapping
+activity, disk and cpu utilization.
+Ideally, to have a balanced system in activity,
+there should be few blocked (b) jobs,
+there should be little paging or swapping activity, there should
+be available bandwidth on the disk devices (most single arms peak
+out at 25-35 tps in practice), and the user cpu utilization (us) should
+be high (above 50%).
+.PP
+If the system is busy, then the count of active jobs may be large,
+and several of these jobs may often be blocked (b). If the virtual
+memory is active, then the paging demon will be running (sr will
+be non-zero). It is healthy for the paging demon to free pages when
+the virtual memory gets active; it is triggered by the amount of free
+memory dropping below a threshold and increases its pace as free memory
+goes to zero.
+.PP
+If you run \fIvmstat\fP
+when the system is busy (a ``vmstat 5'' gives all the
+numbers computed by the system), you can find
+imbalances by noting abnormal job distributions. If many
+processes are blocked (b), then the disk subsystem
+is overloaded or imbalanced. If you have several non-dma
+devices or open teletype lines that are ``ringing'', or user programs
+that are doing high-speed non-buffered input/output, then the system
+time may go high (60-80% or higher).
+It is often possible to pin down the cause of high system time by
+looking to see if there is excessive context switching (cs), interrupt
+activity (in) or system call activity (sy). Long term measurements
+on one of
+our large machines show
+an average of 60 context switches and interrupts
+per second and an average of 90 system calls per second.
+.PP
+If the system is heavily loaded, or if you have little memory
+for your load (1 megabyte is little in our environment), then the system
+may be forced to swap. This is likely to be accompanied by a noticeable
+reduction in the system responsiveness and long pauses when interactive
+jobs such as editors swap out.
+.PP
+A second important program is \fIiostat\fP\|(1).
+\fIIostat\fP
+iteratively reports the number of characters read and written to terminals,
+and, for each disk, the number of transfers per second, kilobytes
+transferred per second,
+and the milliseconds per average seek.
+It also gives the percentage of time the system has
+spent in user mode, in user mode running low priority (niced) processes,
+in system mode, and idling.
+.PP
+To compute this information, for each disk, seeks and data transfer completions
+and the number of words transferred are counted;
+for terminals collectively, the number
+of input and output characters are counted.
+Also, every 100 ms,
+the state of each disk is examined
+and a tally is made if the disk is active.
+From these numbers and the transfer rates
+of the devices it is possible to determine
+average seek times for each device.
+.PP
+When filesystems are poorly placed on the available
+disks, figures reported by \fIiostat\fP can be used
+to pinpoint bottlenecks. Under heavy system load, disk
+traffic should be spread out among the drives with
+higher traffic expected to the devices where the root, swap, and
+/tmp filesystems are located. When multiple disk drives are
+attached to the same controller, the system will
+attempt to overlap seek operations with I/O transfers. When
+seeks are performed, \fIiostat\fP will show
+non-zero average seek times. Most modern disk drives should
+exhibit an average seek time of 25-35 ms.
+.PP
+Terminal traffic reported by \fIiostat\fP should be heavily
+output oriented unless terminal lines are being used for
+data transfer by programs such as \fIuucp\fP. Input and
+output rates are system specific. Screen editors
+such as \fIvi\fP and \fIemacs\fP tend to exhibit output/input
+ratios of anywhere from 5/1 to 8/1. On one of our largest
+systems, 88 terminal lines plus 32 pseudo terminals, we observed
+an average of 180 characters/second input and 450 characters/second
+output over 4 days of operation.
+.NH 2
+Kernel profiling
+.PP
+It is simple to build a 4.2BSD kernel that will automatically
+collect profiling information as it operates simply by specifying the
+.B \-p
+option to \fIconfig\fP\|(8) when configuring a kernel.
+The program counter sampling can be driven by the system clock,
+or by an alternate real time clock.
+The latter is highly recommended as use of the system clock results
+in statistical anomalies in accounting for
+the time spent in the kernel clock routine.
+.PP
+Once a profiling system has been booted statistic gathering is
+handled by \fIkgmon\fP\|(8).
+\fIKgmon\fP allows profiling to be started and stopped
+and the internal state of the profiling buffers to be dumped.
+\fIKgmon\fP can also be used to reset the state of the internal
+buffers to allow multiple experiments to be run without
+rebooting the machine.
+.PP
+The profiling data is processed with \fIgprof\fP\|(1)
+to obtain information regarding the system's operation.
+Profiled systems maintain histograms of the kernel program counter,
+the number of invocations of each routine,
+and a dynamic call graph of the executing system.
+The postprocessing propagates the time spent in each
+routine along the arcs of the call graph.
+\fIGprof\fP then generates a listing for each routine in the kernel,
+sorted according to the time it uses
+including the time of its call graph descendents.
+Below each routine entry is shown its (direct) call graph children,
+and how their times are propagated to this routine.
+A similar display above the routine shows how this routine's time and the
+time of its descendents is propagated to its (direct) call graph parents.
+.PP
+A profiled system is about 5-10% larger in its text space because of
+the calls to count the subroutine invocations.
+When the system executes,
+the profiling data is stored in a buffer that is 1.2
+times the size of the text space.
+All the information is summarized in memory,
+it is not necessary to have a trace file
+being continuously dumped to disk.
+The overhead for running a profiled system varies;
+under normal load we see anywhere from 5-25%
+of the system time spent in the profiling code.
+Thus the system is noticeably slower than an unprofiled system,
+yet is not so bad that it cannot be used in a production environment.
+This is important since it allows us to gather data
+in a real environment rather than trying to
+devise synthetic work loads.
+.NH 2
+Kernel tracing
+.PP
+The kernel can be configured to trace certain operations by
+specifying ``options TRACE'' in the configuration file. This
+forces the inclusion of code that records the occurrence of
+events in \fItrace records\fP in a circular buffer in kernel
+memory. Events may be enabled/disabled selectively while the
+system is operating. Each trace record contains a time stamp
+(taken from the VAX hardware time of day clock register), an
+event identifier, and additional information that is interpreted
+according to the event type. Buffer cache operations, such as
+initiating a read, include
+the disk drive, block number, and transfer size in the trace record.
+Virtual memory operations, such as a pagein completing, include
+the virtual address and process id in the trace record. The circular
+buffer is normally configured to hold 256 16-byte trace records.\**
+.FS
+\** The standard trace facilities distributed with 4.2
+differ slightly from those described here. The time stamp in the
+distributed system is calculated from the kernel's time of day
+variable instead of the VAX hardware register, and the buffer cache
+trace points do not record the transfer size.
+.FE
+.PP
+Several user programs were written to sample and interpret the
+tracing information. One program runs in the background and
+periodically reads the circular buffer of trace records. The
+trace information is compressed, in some instances interpreted
+to generate additional information, and a summary is written to a
+file. In addition, the sampling program can also record
+information from other kernel data structures, such as those
+interpreted by the \fIvmstat\fP program. Data written out to
+a file is further buffered to minimize I/O load.
+.PP
+Once a trace log has been created, programs that compress
+and interpret the data may be run to generate graphs showing the
+data and relationships between traced events and
+system load.
+.PP
+The trace package was used mainly to investigate the operation of
+the file system buffer cache. The sampling program maintained a
+history of read-ahead blocks and used the trace information to
+calculate, for example, percentage of read-ahead blocks used.
+.NH 2
+Benchmark programs
+.PP
+Benchmark programs were used in two ways. First, a suite of
+programs was constructed to calculate the cost of certain basic
+system operations. Operations such as system call overhead and
+context switching time are critically important in evaluating the
+overall performance of a system. Because of the drastic changes in
+the system between 4.1BSD and 4.2BSD, it was important to verify
+the overhead of these low level operations had not changed appreciably.
+.PP
+The second use of benchmarks was in exercising
+suspected bottlenecks.
+When we suspected a specific problem with the system,
+a small benchmark program was written to repeatedly use
+the facility.
+While these benchmarks are not useful as a general tool
+they can give quick feedback on whether a hypothesized
+improvement is really having an effect.
+It is important to realize that the only real assurance
+that a change has a beneficial effect is through
+long term measurements of general timesharing.
+We have numerous examples where a benchmark program
+suggests vast improvements while the change
+in the long term system performance is negligible,
+and conversely examples in which the benchmark program run more slowly,
+but the long term system performance improves significantly.
diff --git a/share/doc/papers/sysperf/3.t b/share/doc/papers/sysperf/3.t
new file mode 100644
index 0000000..832ad42
--- /dev/null
+++ b/share/doc/papers/sysperf/3.t
@@ -0,0 +1,694 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)3.t 5.1 (Berkeley) 4/17/91
+.\"
+.ds RH Results of our observations
+.NH
+Results of our observations
+.PP
+When 4.2BSD was first installed on several large timesharing systems
+the degradation in performance was significant.
+Informal measurements showed 4.2BSD providing 80% of the throughput
+of 4.1BSD (based on load averages observed under a normal timesharing load).
+Many of the initial problems found were because of programs that were
+not part of 4.1BSD. Using the techniques described in the previous
+section and standard process profiling several problems were identified.
+Later work concentrated on the operation of the kernel itself.
+In this section we discuss the problems uncovered; in the next
+section we describe the changes made to the system.
+.NH 2
+User programs
+.PP
+.NH 3
+Mail system
+.PP
+The mail system was the first culprit identified as a major
+contributor to the degradation in system performance.
+At Lucasfilm the mail system is heavily used
+on one machine, a VAX-11/780 with eight megabytes of memory.\**
+.FS
+\** During part of these observations the machine had only four
+megabytes of memory.
+.FE
+Message
+traffic is usually between users on the same machine and ranges from
+person-to-person telephone messages to per-organization distribution
+lists. After conversion to 4.2BSD, it was
+immediately noticed that mail to distribution lists of 20 or more people
+caused the system load to jump by anywhere from 3 to 6 points.
+The number of processes spawned by the \fIsendmail\fP program and
+the messages sent from \fIsendmail\fP to the system logging
+process, \fIsyslog\fP, generated significant load both from their
+execution and their interference with basic system operation. The
+number of context switches and disk transfers often doubled while
+\fIsendmail\fP operated; the system call rate jumped dramatically.
+System accounting information consistently
+showed \fIsendmail\fP as the top cpu user on the system.
+.NH 3
+Network servers
+.PP
+The network services provided in 4.2BSD add new capabilities to the system,
+but are not without cost. The system uses one daemon process to accept
+requests for each network service provided. The presence of many
+such daemons increases the numbers of active processes and files,
+and requires a larger configuration to support the same number of users.
+The overhead of the routing and status updates can consume
+several percent of the cpu.
+Remote logins and shells incur more overhead
+than their local equivalents.
+For example, a remote login uses three processes and a
+pseudo-terminal handler in addition to the local hardware terminal
+handler. When using a screen editor, sending and echoing a single
+character involves four processes on two machines.
+The additional processes, context switching, network traffic, and
+terminal handler overhead can roughly triple the load presented by one
+local terminal user.
+.NH 2
+System overhead
+.PP
+To measure the costs of various functions in the kernel,
+a profiling system was run for a 17 hour
+period on one of our general timesharing machines.
+While this is not as reproducible as a synthetic workload,
+it certainly represents a realistic test.
+This test was run on several occasions over a three month period.
+Despite the long period of time that elapsed
+between the test runs the shape of the profiles,
+as measured by the number of times each system call
+entry point was called, were remarkably similar.
+.PP
+These profiles turned up several bottlenecks that are
+discussed in the next section.
+Several of these were new to 4.2BSD,
+but most were caused by overloading of mechanisms
+which worked acceptably well in previous BSD systems.
+The general conclusion from our measurements was that
+the ratio of user to system time had increased from
+45% system / 55% user in 4.1BSD to 57% system / 43% user
+in 4.2BSD.
+.NH 3
+Micro-operation benchmarks
+.PP
+To compare certain basic system operations
+between 4.1BSD and 4.2BSD a suite of benchmark
+programs was constructed and run on a VAX-11/750 with 4.5 megabytes
+of physical memory and two disks on a MASSBUS controller.
+Tests were run with the machine operating in single user mode
+under both 4.1BSD and 4.2BSD. Paging was localized to the drive
+where the root file system was located.
+.PP
+The benchmark programs were modeled after the Kashtan benchmarks,
+[Kashtan80], with identical sources compiled under each system.
+The programs and their intended purpose are described briefly
+before the presentation of the results. The benchmark scripts
+were run twice with the results shown as the average of
+the two runs.
+The source code for each program and the shell scripts used during
+the benchmarks are included in the Appendix.
+.PP
+The set of tests shown in Table 1 was concerned with
+system operations other than paging. The intent of most
+benchmarks is clear. The result of running \fIsignocsw\fP is
+deducted from the \fIcsw\fP benchmark to calculate the context
+switch overhead. The \fIexec\fP tests use two different jobs to gauge
+the cost of overlaying a larger program with a smaller one
+and vice versa. The
+``null job'' and ``big job'' differ solely in the size of their data
+segments, 1 kilobyte versus 256 kilobytes. In both cases the
+text segment of the parent is larger than that of the child.\**
+.FS
+\** These tests should also have measured the cost of expanding the
+text segment; unfortunately time did not permit running additional tests.
+.FE
+All programs were compiled into the default load format that causes
+the text segment to be demand paged out of the file system and shared
+between processes.
+.KF
+.DS L
+.TS
+center box;
+l | l.
+Test Description
+_
+syscall perform 100,000 \fIgetpid\fP system calls
+csw perform 10,000 context switches using signals
+signocsw send 10,000 signals to yourself
+pipeself4 send 10,000 4-byte messages to yourself
+pipeself512 send 10,000 512-byte messages to yourself
+pipediscard4 send 10,000 4-byte messages to child who discards
+pipediscard512 send 10,000 512-byte messages to child who discards
+pipeback4 exchange 10,000 4-byte messages with child
+pipeback512 exchange 10,000 512-byte messages with child
+forks0 fork-exit-wait 1,000 times
+forks1k sbrk(1024), fault page, fork-exit-wait 1,000 times
+forks100k sbrk(102400), fault pages, fork-exit-wait 1,000 times
+vforks0 vfork-exit-wait 1,000 times
+vforks1k sbrk(1024), fault page, vfork-exit-wait 1,000 times
+vforks100k sbrk(102400), fault pages, vfork-exit-wait 1,000 times
+execs0null fork-exec ``null job''-exit-wait 1,000 times
+execs0null (1K env) execs0null above, with 1K environment added
+execs1knull sbrk(1024), fault page, fork-exec ``null job''-exit-wait 1,000 times
+execs1knull (1K env) execs1knull above, with 1K environment added
+execs100knull sbrk(102400), fault pages, fork-exec ``null job''-exit-wait 1,000 times
+vexecs0null vfork-exec ``null job''-exit-wait 1,000 times
+vexecs1knull sbrk(1024), fault page, vfork-exec ``null job''-exit-wait 1,000 times
+vexecs100knull sbrk(102400), fault pages, vfork-exec ``null job''-exit-wait 1,000 times
+execs0big fork-exec ``big job''-exit-wait 1,000 times
+execs1kbig sbrk(1024), fault page, fork-exec ``big job''-exit-wait 1,000 times
+execs100kbig sbrk(102400), fault pages, fork-exec ``big job''-exit-wait 1,000 times
+vexecs0big vfork-exec ``big job''-exit-wait 1,000 times
+vexecs1kbig sbrk(1024), fault pages, vfork-exec ``big job''-exit-wait 1,000 times
+vexecs100kbig sbrk(102400), fault pages, vfork-exec ``big job''-exit-wait 1,000 times
+.TE
+.ce
+Table 1. Kernel Benchmark programs.
+.DE
+.KE
+.PP
+The results of these tests are shown in Table 2. If the 4.1BSD results
+are scaled to reflect their being run on a VAX-11/750, they
+correspond closely to those found in [Joy80].\**
+.FS
+\** We assume that a VAX-11/750 runs at 60% of the speed of a VAX-11/780
+(not considering floating point operations).
+.FE
+.KF
+.DS L
+.TS
+center box;
+c s s s s s s s s s
+c || c s s || c s s || c s s
+c || c s s || c s s || c s s
+c || c | c | c || c | c | c || c | c | c
+l || n | n | n || n | n | n || n | n | n.
+Berkeley Software Distribution UNIX Systems
+_
+Test Elapsed Time User Time System Time
+\^ _ _ _
+\^ 4.1 4.2 4.3 4.1 4.2 4.3 4.1 4.2 4.3
+=
+syscall 28.0 29.0 23.0 4.5 5.3 3.5 23.9 23.7 20.4
+csw 45.0 60.0 45.0 3.5 4.3 3.3 19.5 25.4 19.0
+signocsw 16.5 23.0 16.0 1.9 3.0 1.1 14.6 20.1 15.2
+pipeself4 21.5 29.0 26.0 1.1 1.1 0.8 20.1 28.0 25.6
+pipeself512 47.5 59.0 55.0 1.2 1.2 1.0 46.1 58.3 54.2
+pipediscard4 32.0 42.0 36.0 3.2 3.7 3.0 15.5 18.8 15.6
+pipediscard512 61.0 76.0 69.0 3.1 2.1 2.0 29.7 36.4 33.2
+pipeback4 57.0 75.0 66.0 2.9 3.2 3.3 25.1 34.2 29.7
+pipeback512 110.0 138.0 125.0 3.1 3.4 2.2 52.2 65.7 57.7
+forks0 37.5 41.0 22.0 0.5 0.3 0.3 34.5 37.6 21.5
+forks1k 40.0 43.0 22.0 0.4 0.3 0.3 36.0 38.8 21.6
+forks100k 217.5 223.0 176.0 0.7 0.6 0.4 214.3 218.4 175.2
+vforks0 34.5 37.0 22.0 0.5 0.6 0.5 27.3 28.5 17.9
+vforks1k 35.0 37.0 22.0 0.6 0.8 0.5 27.2 28.6 17.9
+vforks100k 35.0 37.0 22.0 0.6 0.8 0.6 27.6 28.9 17.9
+execs0null 97.5 92.0 66.0 3.8 2.4 0.6 68.7 82.5 48.6
+execs0null (1K env) 197.0 229.0 75.0 4.1 2.6 0.9 167.8 212.3 62.6
+execs1knull 99.0 100.0 66.0 4.1 1.9 0.6 70.5 86.8 48.7
+execs1knull (1K env) 199.0 230.0 75.0 4.2 2.6 0.7 170.4 214.9 62.7
+execs100knull 283.5 278.0 216.0 4.8 2.8 1.1 251.9 269.3 202.0
+vexecs0null 100.0 92.0 66.0 5.1 2.7 1.1 63.7 76.8 45.1
+vexecs1knull 100.0 91.0 66.0 5.2 2.8 1.1 63.2 77.1 45.1
+vexecs100knull 100.0 92.0 66.0 5.1 3.0 1.1 64.0 77.7 45.6
+execs0big 129.0 201.0 101.0 4.0 3.0 1.0 102.6 153.5 92.7
+execs1kbig 130.0 202.0 101.0 3.7 3.0 1.0 104.7 155.5 93.0
+execs100kbig 318.0 385.0 263.0 4.8 3.1 1.1 286.6 339.1 247.9
+vexecs0big 128.0 200.0 101.0 4.6 3.5 1.6 98.5 149.6 90.4
+vexecs1kbig 125.0 200.0 101.0 4.7 3.5 1.3 98.9 149.3 88.6
+vexecs100kbig 126.0 200.0 101.0 4.2 3.4 1.3 99.5 151.0 89.0
+.TE
+.ce
+Table 2. Kernel Benchmark results (all times in seconds).
+.DE
+.KE
+.PP
+In studying the measurements we found that the basic system call
+and context switch overhead did not change significantly
+between 4.1BSD and 4.2BSD. The \fIsignocsw\fP results were caused by
+the changes to the \fIsignal\fP interface, resulting
+in an additional subroutine invocation for each call, not
+to mention additional complexity in the system's implementation.
+.PP
+The times for the use of pipes are significantly higher under
+4.2BSD because of their implementation on top of the interprocess
+communication facilities. Under 4.1BSD pipes were implemented
+without the complexity of the socket data structures and with
+simpler code. Further, while not obviously a factor here,
+4.2BSD pipes have less system buffer space provided them than
+4.1BSD pipes.
+.PP
+The \fIexec\fP tests shown in Table 2 were performed with 34 bytes of
+environment information under 4.1BSD and 40 bytes under 4.2BSD.
+To figure the cost of passing data through the environment,
+the execs0null and execs1knull tests were rerun with
+1065 additional bytes of data. The results are show in Table 3.
+.KF
+.DS L
+.TS
+center box;
+c || c s || c s || c s
+c || c s || c s || c s
+c || c | c || c | c || c | c
+l || n | n || n | n || n | n.
+Test Real User System
+\^ _ _ _
+\^ 4.1 4.2 4.1 4.2 4.1 4.2
+=
+execs0null 197.0 229.0 4.1 2.6 167.8 212.3
+execs1knull 199.0 230.0 4.2 2.6 170.4 214.9
+.TE
+.ce
+Table 3. Benchmark results with ``large'' environment (all times in seconds).
+.DE
+.KE
+These results show that passing argument data is significantly
+slower than under 4.1BSD: 121 ms/byte versus 93 ms/byte. Even using
+this factor to adjust the basic overhead of an \fIexec\fP system
+call, this facility is more costly under 4.2BSD than under 4.1BSD.
+.NH 3
+Path name translation
+.PP
+The single most expensive function performed by the kernel
+is path name translation.
+This has been true in almost every UNIX kernel [Mosher80];
+we find that our general time sharing systems do about
+500,000 name translations per day.
+.PP
+Name translations became more expensive in 4.2BSD for several reasons.
+The single most expensive addition was the symbolic link.
+Symbolic links
+have the effect of increasing the average number of components
+in path names to be translated.
+As an insidious example,
+consider the system manager that decides to change /tmp
+to be a symbolic link to /usr/tmp.
+A name such as /tmp/tmp1234 that previously required two component
+translations,
+now requires four component translations plus the cost of reading
+the contents of the symbolic link.
+.PP
+The new directory format also changes the characteristics of
+name translation.
+The more complex format requires more computation to determine
+where to place new entries in a directory.
+Conversely the additional information allows the system to only
+look at active entries when searching,
+hence searches of directories that had once grown large
+but currently have few active entries are checked quickly.
+The new format also stores the length of each name so that
+costly string comparisons are only done on names that are the
+same length as the name being sought.
+.PP
+The net effect of the changes is that the average time to
+translate a path name in 4.2BSD is 24.2 milliseconds,
+representing 40% of the time processing system calls,
+that is 19% of the total cycles in the kernel,
+or 11% of all cycles executed on the machine.
+The times are shown in Table 4. We have no comparable times
+for \fInamei\fP under 4.1 though they are certain to
+be significantly less.
+.KF
+.DS L
+.TS
+center box;
+l r r.
+part time % of kernel
+_
+self 14.3 ms/call 11.3%
+child 9.9 ms/call 7.9%
+_
+total 24.2 ms/call 19.2%
+.TE
+.ce
+Table 4. Call times for \fInamei\fP in 4.2BSD.
+.DE
+.KE
+.NH 3
+Clock processing
+.PP
+Nearly 25% of the time spent in the kernel is spent in the clock
+processing routines.
+(This is a clear indication that to avoid sampling bias when profiling the
+kernel with our tools
+we need to drive them from an independent clock.)
+These routines are responsible for implementing timeouts,
+scheduling the processor,
+maintaining kernel statistics,
+and tending various hardware operations such as
+draining the terminal input silos.
+Only minimal work is done in the hardware clock interrupt
+routine (at high priority), the rest is performed (at a lower priority)
+in a software interrupt handler scheduled by the hardware interrupt
+handler.
+In the worst case, with a clock rate of 100 Hz
+and with every hardware interrupt scheduling a software
+interrupt, the processor must field 200 interrupts per second.
+The overhead of simply trapping and returning
+is 3% of the machine cycles,
+figuring out that there is nothing to do
+requires an additional 2%.
+.NH 3
+Terminal multiplexors
+.PP
+The terminal multiplexors supported by 4.2BSD have programmable receiver
+silos that may be used in two ways.
+With the silo disabled, each character received causes an interrupt
+to the processor.
+Enabling the receiver silo allows the silo to fill before
+generating an interrupt, allowing multiple characters to be read
+for each interrupt.
+At low rates of input, received characters will not be processed
+for some time unless the silo is emptied periodically.
+The 4.2BSD kernel uses the input silos of each terminal multiplexor,
+and empties each silo on each clock interrupt.
+This allows high input rates without the cost of per-character interrupts
+while assuring low latency.
+However, as character input rates on most machines are usually
+low (about 25 characters per second),
+this can result in excessive overhead.
+At the current clock rate of 100 Hz, a machine with 5 terminal multiplexors
+configured makes 500 calls to the receiver interrupt routines per second.
+In addition, to achieve acceptable input latency
+for flow control, each clock interrupt must schedule
+a software interrupt to run the silo draining routines.\**
+.FS
+\** It is not possible to check the input silos at
+the time of the actual clock interrupt without modifying the terminal
+line disciplines, as the input queues may not be in a consistent state \**.
+.FE
+\** This implies that the worst case estimate for clock processing
+is the basic overhead for clock processing.
+.NH 3
+Process table management
+.PP
+In 4.2BSD there are numerous places in the kernel where a linear search
+of the process table is performed:
+.IP \(bu 3
+in \fIexit\fP to locate and wakeup a process's parent;
+.IP \(bu 3
+in \fIwait\fP when searching for \fB\s-2ZOMBIE\s+2\fP and
+\fB\s-2STOPPED\s+2\fP processes;
+.IP \(bu 3
+in \fIfork\fP when allocating a new process table slot and
+counting the number of processes already created by a user;
+.IP \(bu 3
+in \fInewproc\fP, to verify
+that a process id assigned to a new process is not currently
+in use;
+.IP \(bu 3
+in \fIkill\fP and \fIgsignal\fP to locate all processes to
+which a signal should be delivered;
+.IP \(bu 3
+in \fIschedcpu\fP when adjusting the process priorities every
+second; and
+.IP \(bu 3
+in \fIsched\fP when locating a process to swap out and/or swap
+in.
+.LP
+These linear searches can incur significant overhead. The rule
+for calculating the size of the process table is:
+.ce
+nproc = 20 + 8 * maxusers
+.sp
+that means a 48 user system will have a 404 slot process table.
+With the addition of network services in 4.2BSD, as many as a dozen
+server processes may be maintained simply to await incoming requests.
+These servers are normally created at boot time which causes them
+to be allocated slots near the beginning of the process table. This
+means that process table searches under 4.2BSD are likely to take
+significantly longer than under 4.1BSD. System profiling shows
+that as much as 20% of the time spent in the kernel on a loaded
+system (a VAX-11/780) can be spent in \fIschedcpu\fP and, on average,
+5-10% of the kernel time is spent in \fIschedcpu\fP.
+The other searches of the proc table are similarly affected.
+This shows the system can no longer tolerate using linear searches of
+the process table.
+.NH 3
+File system buffer cache
+.PP
+The trace facilities described in section 2.3 were used
+to gather statistics on the performance of the buffer cache.
+We were interested in measuring the effectiveness of the
+cache and the read-ahead policies.
+With the file system block size in 4.2BSD four to
+eight times that of a 4.1BSD file system, we were concerned
+that large amounts of read-ahead might be performed without
+being used. Also, we were interested in seeing if the
+rules used to size the buffer cache at boot time were severely
+affecting the overall cache operation.
+.PP
+The tracing package was run over a three hour period during
+a peak mid-afternoon period on a VAX 11/780 with four megabytes
+of physical memory.
+This resulted in a buffer cache containing 400 kilobytes of memory
+spread among 50 to 200 buffers
+(the actual number of buffers depends on the size mix of
+disk blocks being read at any given time).
+The pertinent configuration information is shown in Table 5.
+.KF
+.DS L
+.TS
+center box;
+l l l l.
+Controller Drive Device File System
+_
+DEC MASSBUS DEC RP06 hp0d /usr
+ hp0b swap
+Emulex SC780 Fujitsu Eagle hp1a /usr/spool/news
+ hp1b swap
+ hp1e /usr/src
+ hp1d /u0 (users)
+ Fujitsu Eagle hp2a /tmp
+ hp2b swap
+ hp2d /u1 (users)
+ Fujitsu Eagle hp3a /
+.TE
+.ce
+Table 5. Active file systems during buffer cache tests.
+.DE
+.KE
+.PP
+During the test period the load average ranged from 2 to 13
+with an average of 5.
+The system had no idle time, 43% user time, and 57% system time.
+The system averaged 90 interrupts per second
+(excluding the system clock interrupts),
+220 system calls per second,
+and 50 context switches per second (40 voluntary, 10 involuntary).
+.PP
+The active virtual memory (the sum of the address space sizes of
+all jobs that have run in the previous twenty seconds)
+over the period ranged from 2 to 6 megabytes with an average
+of 3.5 megabytes.
+There was no swapping, though the page daemon was inspecting
+about 25 pages per second.
+.PP
+On average 250 requests to read disk blocks were initiated
+per second.
+These include read requests for file blocks made by user
+programs as well as requests initiated by the system.
+System reads include requests for indexing information to determine
+where a file's next data block resides,
+file system layout maps to allocate new data blocks,
+and requests for directory contents needed to do path name translations.
+.PP
+On average, an 85% cache hit rate was observed for read requests.
+Thus only 37 disk reads were initiated per second.
+In addition, 5 read-ahead requests were made each second
+filling about 20% of the buffer pool.
+Despite the policies to rapidly reuse read-ahead buffers
+that remain unclaimed, more than 90% of the read-ahead
+buffers were used.
+.PP
+These measurements showed that the buffer cache was working
+effectively. Independent tests have also showed that the size
+of the buffer cache may be reduced significantly on memory-poor
+system without severe effects;
+we have not yet tested this hypothesis [Shannon83].
+.NH 3
+Network subsystem
+.PP
+The overhead associated with the
+network facilities found in 4.2BSD is often
+difficult to gauge without profiling the system.
+This is because most input processing is performed
+in modules scheduled with software interrupts.
+As a result, the system time spent performing protocol
+processing is rarely attributed to the processes that
+really receive the data. Since the protocols supported
+by 4.2BSD can involve significant overhead this was a serious
+concern. Results from a profiled kernel show an average
+of 5% of the system time is spent
+performing network input and timer processing in our environment
+(a 3Mb/s Ethernet with most traffic using TCP).
+This figure can vary significantly depending on
+the network hardware used, the average message
+size, and whether packet reassembly is required at the network
+layer. On one machine we profiled over a 17 hour
+period (our gateway to the ARPANET)
+206,000 input messages accounted for 2.4% of the system time,
+while another 0.6% of the system time was spent performing
+protocol timer processing.
+This machine was configured with an ACC LH/DH IMP interface
+and a DMA 3Mb/s Ethernet controller.
+.PP
+The performance of TCP over slower long-haul networks
+was degraded substantially by two problems.
+The first problem was a bug that prevented round-trip timing measurements
+from being made, thus increasing retransmissions unnecessarily.
+The second was a problem with the maximum segment size chosen by TCP,
+that was well-tuned for Ethernet, but was poorly chosen for
+the ARPANET, where it causes packet fragmentation. (The maximum
+segment size was actually negotiated upwards to a value that
+resulted in excessive fragmentation.)
+.PP
+When benchmarked in Ethernet environments the main memory buffer management
+of the network subsystem presented some performance anomalies.
+The overhead of processing small ``mbufs'' severely affected throughput for a
+substantial range of message sizes.
+In spite of the fact that most system ustilities made use of the throughput
+optimal 1024 byte size, user processes faced large degradations for some
+arbitrary sizes. This was specially true for TCP/IP transmissions [Cabrera84,
+Cabrera85].
+.NH 3
+Virtual memory subsystem
+.PP
+We ran a set of tests intended to exercise the virtual
+memory system under both 4.1BSD and 4.2BSD.
+The tests are described in Table 6.
+The test programs dynamically allocated
+a 7.3 Megabyte array (using \fIsbrk\fP\|(2)) then referenced
+pages in the array either: sequentially, in a purely random
+fashion, or such that the distance between
+successive pages accessed was randomly selected from a Gaussian
+distribution. In the last case, successive runs were made with
+increasing standard deviations.
+.KF
+.DS L
+.TS
+center box;
+l | l.
+Test Description
+_
+seqpage sequentially touch pages, 10 iterations
+seqpage-v as above, but first make \fIvadvise\fP\|(2) call
+randpage touch random page 30,000 times
+randpage-v as above, but first make \fIvadvise\fP call
+gausspage.1 30,000 Gaussian accesses, standard deviation of 1
+gausspage.10 as above, standard deviation of 10
+gausspage.30 as above, standard deviation of 30
+gausspage.40 as above, standard deviation of 40
+gausspage.50 as above, standard deviation of 50
+gausspage.60 as above, standard deviation of 60
+gausspage.80 as above, standard deviation of 80
+gausspage.inf as above, standard deviation of 10,000
+.TE
+.ce
+Table 6. Paging benchmark programs.
+.DE
+.KE
+.PP
+The results in Table 7 show how the additional
+memory requirements
+of 4.2BSD can generate more work for the paging system.
+Under 4.1BSD,
+the system used 0.5 of the 4.5 megabytes of physical memory
+on the test machine;
+under 4.2BSD it used nearly 1 megabyte of physical memory.\**
+.FS
+\** The 4.1BSD system used for testing was really a 4.1a
+system configured
+with networking facilities and code to support
+remote file access. The
+4.2BSD system also included the remote file access code.
+Since both
+systems would be larger than similarly configured ``vanilla''
+4.1BSD or 4.2BSD system, we consider out conclusions to still be valid.
+.FE
+This resulted in more page faults and, hence, more system time.
+To establish a common ground on which to compare the paging
+routines of each system, we check instead the average page fault
+service times for those test runs that had a statistically significant
+number of random page faults. These figures, shown in Table 8, show
+no significant difference between the two systems in
+the area of page fault servicing. We currently have
+no explanation for the results of the sequential
+paging tests.
+.KF
+.DS L
+.TS
+center box;
+l || c s || c s || c s || c s
+l || c s || c s || c s || c s
+l || c | c || c | c || c | c || c | c
+l || n | n || n | n || n | n || n | n.
+Test Real User System Page Faults
+\^ _ _ _ _
+\^ 4.1 4.2 4.1 4.2 4.1 4.2 4.1 4.2
+=
+seqpage 959 1126 16.7 12.8 197.0 213.0 17132 17113
+seqpage-v 579 812 3.8 5.3 216.0 237.7 8394 8351
+randpage 571 569 6.7 7.6 64.0 77.2 8085 9776
+randpage-v 572 562 6.1 7.3 62.2 77.5 8126 9852
+gausspage.1 25 24 23.6 23.8 0.8 0.8 8 8
+gausspage.10 26 26 22.7 23.0 3.2 3.6 2 2
+gausspage.30 34 33 25.0 24.8 8.6 8.9 2 2
+gausspage.40 42 81 23.9 25.0 11.5 13.6 3 260
+gausspage.50 113 175 24.2 26.2 19.6 26.3 784 1851
+gausspage.60 191 234 27.6 26.7 27.4 36.0 2067 3177
+gausspage.80 312 329 28.0 27.9 41.5 52.0 3933 5105
+gausspage.inf 619 621 82.9 85.6 68.3 81.5 8046 9650
+.TE
+.ce
+Table 7. Paging benchmark results (all times in seconds).
+.DE
+.KE
+.KF
+.DS L
+.TS
+center box;
+c || c s || c s
+c || c s || c s
+c || c | c || c | c
+l || n | n || n | n.
+Test Page Faults PFST
+\^ _ _
+\^ 4.1 4.2 4.1 4.2
+=
+randpage 8085 9776 791 789
+randpage-v 8126 9852 765 786
+gausspage.inf 8046 9650 848 844
+.TE
+.ce
+Table 8. Page fault service times (all times in microseconds).
+.DE
+.KE
diff --git a/share/doc/papers/sysperf/4.t b/share/doc/papers/sysperf/4.t
new file mode 100644
index 0000000..cdbfb14
--- /dev/null
+++ b/share/doc/papers/sysperf/4.t
@@ -0,0 +1,774 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)4.t 5.1 (Berkeley) 4/17/91
+.\"
+.ds RH Performance Improvements
+.NH
+Performance Improvements
+.PP
+This section outlines the changes made to the system
+since the 4.2BSD distribution.
+The changes reported here were made in response
+to the problems described in Section 3.
+The improvements fall into two major classes;
+changes to the kernel that are described in this section,
+and changes to the system libraries and utilities that are
+described in the following section.
+.NH 2
+Performance Improvements in the Kernel
+.PP
+Our goal has been to optimize system performance
+for our general timesharing environment.
+Since most sites running 4.2BSD have been forced to take
+advantage of declining
+memory costs rather than replace their existing machines with
+ones that are more powerful, we have
+chosen to optimize running time at the expense of memory.
+This tradeoff may need to be reconsidered for personal workstations
+that have smaller memories and higher latency disks.
+Decreases in the running time of the system may be unnoticeable
+because of higher paging rates incurred by a larger kernel.
+Where possible, we have allowed the size of caches to be controlled
+so that systems with limited memory may reduce them as appropriate.
+.NH 3
+Name Cacheing
+.PP
+Our initial profiling studies showed that more than one quarter
+of the time in the system was spent in the
+pathname translation routine, \fInamei\fP,
+translating path names to inodes\u\s-21\s0\d\**.
+.FS
+\** \u\s-21\s0\d Inode is an abbreviation for ``Index node''.
+Each file on the system is described by an inode;
+the inode maintains access permissions, and an array of pointers to
+the disk blocks that hold the data associated with the file.
+.FE
+An inspection of \fInamei\fP shows that
+it consists of two nested loops.
+The outer loop is traversed once per pathname component.
+The inner loop performs a linear search through a directory looking
+for a particular pathname component.
+.PP
+Our first idea was to reduce the number of iterations
+around the inner loop of \fInamei\fP by observing that many programs
+step through a directory performing an operation on each entry in turn.
+To improve performance for processes doing directory scans,
+the system keeps track of the directory offset of the last component of the
+most recently translated path name for each process.
+If the next name the process requests is in the same directory,
+the search is started from the offset that the previous name was found
+(instead of from the beginning of the directory).
+Changing directories invalidates the cache, as
+does modifying the directory.
+For programs that step sequentially through a directory with
+.EQ
+delim $$
+.EN
+$N$ files, search time decreases from $O ( N sup 2 )$ to $O(N)$.
+.EQ
+delim off
+.EN
+.PP
+The cost of the cache is about 20 lines of code
+(about 0.2 kilobytes)
+and 16 bytes per process, with the cached data
+stored in a process's \fIuser\fP vector.
+.PP
+As a quick benchmark to verify the maximum effectiveness of the
+cache we ran ``ls \-l''
+on a directory containing 600 files.
+Before the per-process cache this command
+used 22.3 seconds of system time.
+After adding the cache the program used the same amount
+of user time, but the system time dropped to 3.3 seconds.
+.PP
+This change prompted our rerunning a profiled system
+on a machine containing the new \fInamei\fP.
+The results showed that the time in \fInamei\fP
+dropped by only 2.6 ms/call and
+still accounted for 36% of the system call time,
+18% of the kernel, or about 10% of all the machine cycles.
+This amounted to a drop in system time from 57% to about 55%.
+The results are shown in Table 9.
+.KF
+.DS L
+.TS
+center box;
+l r r.
+part time % of kernel
+_
+self 11.0 ms/call 9.2%
+child 10.6 ms/call 8.9%
+_
+total 21.6 ms/call 18.1%
+.TE
+.ce
+Table 9. Call times for \fInamei\fP with per-process cache.
+.DE
+.KE
+.PP
+The small performance improvement
+was caused by a low cache hit ratio.
+Although the cache was 90% effective when hit,
+it was only usable on about 25% of the names being translated.
+An additional reason for the small improvement was that
+although the amount of time spent in \fInamei\fP itself
+decreased substantially,
+more time was spent in the routines that it called
+since each directory had to be accessed twice;
+once to search from the middle to the end,
+and once to search from the beginning to the middle.
+.PP
+Frequent requests for a small set of names are best handled
+with a cache of recent name translations\**.
+.FS
+\** The cache is keyed on a name and the
+inode and device number of the directory that contains it.
+Associated with each entry is a pointer to the corresponding
+entry in the inode table.
+.FE
+This has the effect of eliminating the inner loop of \fInamei\fP.
+For each path name component,
+\fInamei\fP first looks in its cache of recent translations
+for the needed name.
+If it exists, the directory search can be completely eliminated.
+.PP
+The system already maintained a cache of recently accessed inodes,
+so the initial name cache
+maintained a simple name-inode association that was used to
+check each component of a path name during name translations.
+We considered implementing the cache by tagging each inode
+with its most recently translated name,
+but eventually decided to have a separate data structure that
+kept names with pointers to the inode table.
+Tagging inodes has two drawbacks;
+many inodes such as those associated with login ports remain in
+the inode table for a long period of time, but are never looked
+up by name.
+Other inodes, such as those describing directories are looked up
+frequently by many different names (\fIe.g.\fP ``..'').
+By keeping a separate table of names, the cache can
+truly reflect the most recently used names.
+An added benefit is that the table can be sized independently
+of the inode table, so that machines with small amounts of memory
+can reduce the size of the cache (or even eliminate it)
+without modifying the inode table structure.
+.PP
+Another issue to be considered is how the name cache should
+hold references to the inode table.
+Normally processes hold ``hard references'' by incrementing the
+reference count in the inode they reference.
+Since the system reuses only inodes with zero reference counts,
+a hard reference insures that the inode pointer will remain valid.
+However, if the name cache holds hard references,
+it is limited to some fraction of the size of the inode table,
+since some inodes must be left free for new files.
+It also makes it impossible for other parts of the kernel
+to verify sole use of a device or file.
+These reasons made it impractical to use hard references
+without affecting the behavior of the inode cacheing scheme.
+Thus, we chose instead to keep ``soft references'' protected
+by a \fIcapability\fP \- a 32-bit number
+guaranteed to be unique\u\s-22\s0\d \**.
+.FS
+\** \u\s-22\s0\d When all the numbers have been exhausted, all outstanding
+capabilities are purged and numbering starts over from scratch.
+Purging is possible as all capabilities are easily found in kernel memory.
+.FE
+When an entry is made in the name cache,
+the capability of its inode is copied to the name cache entry.
+When an inode is reused it is issued a new capability.
+When a name cache hit occurs,
+the capability of the name cache entry is compared
+with the capability of the inode that it references.
+If the capabilities do not match, the name cache entry is invalid.
+Since the name cache holds only soft references,
+it may be sized independent of the size of the inode table.
+A final benefit of using capabilities is that all
+cached names for an inode may be invalidated without
+searching through the entire cache;
+instead all you need to do is assign a new capability to the inode.
+.PP
+The cost of the name cache is about 200 lines of code
+(about 1.2 kilobytes)
+and 48 bytes per cache entry.
+Depending on the size of the system,
+about 200 to 1000 entries will normally be configured,
+using 10-50 kilobytes of physical memory.
+The name cache is resident in memory at all times.
+.PP
+After adding the system wide name cache we reran ``ls \-l''
+on the same directory.
+The user time remained the same,
+however the system time rose slightly to 3.7 seconds.
+This was not surprising as \fInamei\fP
+now had to maintain the cache,
+but was never able to make any use of it.
+.PP
+Another profiled system was created and measurements
+were collected over a 17 hour period. These measurements
+showed a 13 ms/call decrease in \fInamei\fP, with
+\fInamei\fP accounting for only 26% of the system call time,
+13% of the time in the kernel,
+or about 7% of all the machine cycles.
+System time dropped from 55% to about 49%.
+The results are shown in Table 10.
+.KF
+.DS L
+.TS
+center box;
+l r r.
+part time % of kernel
+_
+self 4.2 ms/call 6.2%
+child 4.4 ms/call 6.6%
+_
+total 8.6 ms/call 12.8%
+.TE
+.ce
+Table 10. Call times for \fInamei\fP with both caches.
+.DE
+.KE
+.PP
+On our general time sharing systems we find that during the twelve
+hour period from 8AM to 8PM the system does 500,000 to 1,000,000
+name translations.
+Statistics on the performance of both caches show that
+the large performance improvement is
+caused by the high hit ratio.
+The name cache has a hit rate of 70%-80%;
+the directory offset cache gets a hit rate of 5%-15%.
+The combined hit rate of the two caches almost always adds up to 85%.
+With the addition of the two caches,
+the percentage of system time devoted to name translation has
+dropped from 25% to less than 13%.
+While the system wide cache reduces both the amount of time in
+the routines that \fInamei\fP calls as well as \fInamei\fP itself
+(since fewer directories need to be accessed or searched),
+it is interesting to note that the actual percentage of system
+time spent in \fInamei\fP itself increases even though the
+actual time per call decreases.
+This is because less total time is being spent in the kernel,
+hence a smaller absolute time becomes a larger total percentage.
+.NH 3
+Intelligent Auto Siloing
+.PP
+Most terminal input hardware can run in two modes:
+it can either generate an interrupt each time a character is received,
+or collect characters in a silo that the system then periodically drains.
+To provide quick response for interactive input and flow control,
+a silo must be checked 30 to 50 times per second.
+Ascii terminals normally exhibit
+an input rate of less than 30 characters per second.
+At this input rate
+they are most efficiently handled with interrupt per character mode,
+since this generates fewer interrupts than draining the input silos
+of the terminal multiplexors at each clock interrupt.
+When input is being generated by another machine
+or a malfunctioning terminal connection, however,
+the input rate is usually more than 50 characters per second.
+It is more efficient to use a device's silo input mode,
+since this generates fewer interrupts than handling each character
+as a separate interrupt.
+Since a given dialup port may switch between uucp logins and user logins,
+it is impossible to statically select the most efficient input mode to use.
+.PP
+We therefore changed the terminal multiplexor handlers
+to dynamically choose between the use of the silo and the use of
+per-character interrupts.
+At low input rates the handler processes characters on an
+interrupt basis, avoiding the overhead
+of checking each interface on each clock interrupt.
+During periods of sustained input, the handler enables the silo
+and starts a timer to drain input.
+This timer runs less frequently than the clock interrupts,
+and is used only when there is a substantial amount of input.
+The transition from using silos to an interrupt per character is
+damped to minimize the number of transitions with bursty traffic
+(such as in network communication).
+Input characters serve to flush the silo, preventing long latency.
+By switching between these two modes of operation dynamically,
+the overhead of checking the silos is incurred only
+when necessary.
+.PP
+In addition to the savings in the terminal handlers,
+the clock interrupt routine is no longer required to schedule
+a software interrupt after each hardware interrupt to drain the silos.
+The software-interrupt level portion of the clock routine is only
+needed when timers expire or the current user process is collecting
+an execution profile.
+Thus, the number of interrupts attributable to clock processing
+is substantially reduced.
+.NH 3
+Process Table Management
+.PP
+As systems have grown larger, the size of the process table
+has grown far past 200 entries.
+With large tables, linear searches must be eliminated
+from any frequently used facility.
+The kernel process table is now multi-threaded to allow selective searching
+of active and zombie processes.
+A third list threads unused process table slots.
+Free slots can be obtained in constant time by taking one
+from the front of the free list.
+The number of processes used by a given user may be computed by scanning
+only the active list.
+Since the 4.2BSD release,
+the kernel maintained linked lists of the descendents of each process.
+This linkage is now exploited when dealing with process exit;
+parents seeking the exit status of children now avoid linear search
+of the process table, but examine only their direct descendents.
+In addition, the previous algorithm for finding all descendents of an exiting
+process used multiple linear scans of the process table.
+This has been changed to follow the links between child process and siblings.
+.PP
+When forking a new process,
+the system must assign it a unique process identifier.
+The system previously scanned the entire process table each time it created
+a new process to locate an identifier that was not already in use.
+Now, to avoid scanning the process table for each new process,
+the system computes a range of unused identifiers
+that can be directly assigned.
+Only when the set of identifiers is exhausted is another process table
+scan required.
+.NH 3
+Scheduling
+.PP
+Previously the scheduler scanned the entire process table
+once per second to recompute process priorities.
+Processes that had run for their entire time slice had their
+priority lowered.
+Processes that had not used their time slice, or that had
+been sleeping for the past second had their priority raised.
+On systems running many processes,
+the scheduler represented nearly 20% of the system time.
+To reduce this overhead,
+the scheduler has been changed to consider only
+runnable processes when recomputing priorities.
+To insure that processes sleeping for more than a second
+still get their appropriate priority boost,
+their priority is recomputed when they are placed back on the run queue.
+Since the set of runnable process is typically only a small fraction
+of the total number of processes on the system,
+the cost of invoking the scheduler drops proportionally.
+.NH 3
+Clock Handling
+.PP
+The hardware clock interrupts the processor 100 times per second
+at high priority.
+As most of the clock-based events need not be done at high priority,
+the system schedules a lower priority software interrupt to do the less
+time-critical events such as cpu scheduling and timeout processing.
+Often there are no such events, and the software interrupt handler
+finds nothing to do and returns.
+The high priority event now checks to see if there are low priority
+events to process;
+if there is nothing to do, the software interrupt is not requested.
+Often, the high priority interrupt occurs during a period when the
+machine had been running at low priority.
+Rather than posting a software interrupt that would occur as
+soon as it returns,
+the hardware clock interrupt handler simply lowers the processor priority
+and calls the software clock routines directly.
+Between these two optimizations, nearly 80 of the 100 software
+interrupts per second can be eliminated.
+.NH 3
+File System
+.PP
+The file system uses a large block size, typically 4096 or 8192 bytes.
+To allow small files to be stored efficiently, the large blocks can
+be broken into smaller fragments, typically multiples of 1024 bytes.
+To minimize the number of full-sized blocks that must be broken
+into fragments, the file system uses a best fit strategy.
+Programs that slowly grow files using write of 1024 bytes or less
+can force the file system to copy the data to
+successively larger and larger fragments until it finally
+grows to a full sized block.
+The file system still uses a best fit strategy the first time
+a fragment is written.
+However, the first time that the file system is forced to copy a growing
+fragment it places it at the beginning of a full sized block.
+Continued growth can be accommodated without further copying
+by using up the rest of the block.
+If the file ceases to grow, the rest of the block is still
+available for holding other fragments.
+.PP
+When creating a new file name,
+the entire directory in which it will reside must be scanned
+to insure that the name does not already exist.
+For large directories, this scan is time consuming.
+Because there was no provision for shortening directories,
+a directory that is once over-filled will increase the cost
+of file creation even after the over-filling is corrected.
+Thus, for example, a congested uucp connection can leave a legacy long
+after it is cleared up.
+To alleviate the problem, the system now deletes empty blocks
+that it finds at the end of a directory while doing a complete
+scan to create a new name.
+.NH 3
+Network
+.PP
+The default amount of buffer space allocated for stream sockets (including
+pipes) has been increased to 4096 bytes.
+Stream sockets and pipes now return their buffer sizes in the block size field
+of the stat structure.
+This information allows the standard I/O library to use more optimal buffering.
+Unix domain stream sockets also return a dummy device and inode number
+in the stat structure to increase compatibility
+with other pipe implementations.
+The TCP maximum segment size is calculated according to the destination
+and interface in use; non-local connections use a more conservative size
+for long-haul networks.
+.PP
+On multiply-homed hosts, the local address bound by TCP now always corresponds
+to the interface that will be used in transmitting data packets for the
+connection.
+Several bugs in the calculation of round trip timing have been corrected.
+TCP now switches to an alternate gateway when an existing route fails,
+or when an ICMP redirect message is received.
+ICMP source quench messages are used to throttle the transmission
+rate of TCP streams by temporarily creating an artificially small
+send window, and retransmissions send only a single packet
+rather than resending all queued data.
+A send policy has been implemented
+that decreases the number of small packets outstanding
+for network terminal traffic [Nagle84],
+providing additional reduction of network congestion.
+The overhead of packet routing has been decreased by changes in the routing
+code and by cacheing the most recently used route for each datagram socket.
+.PP
+The buffer management strategy implemented by \fIsosend\fP has been
+changed to make better use of the increased size of the socket buffers
+and a better tuned delayed acknowledgement algorithm.
+Routing has been modified to include a one element cache of the last
+route computed.
+Multiple messages send with the same destination now require less processing.
+Performance deteriorates because of load in
+either the sender host, receiver host, or ether.
+Also, any CPU contention degrades substantially
+the throughput achievable by user processes [Cabrera85].
+We have observed empty VAX 11/750s using up to 90% of their cycles
+transmitting network messages.
+.NH 3
+Exec
+.PP
+When \fIexec\fP-ing a new process, the kernel creates the new
+program's argument list by copying the arguments and environment
+from the parent process's address space into the system, then back out
+again onto the stack of the newly created process.
+These two copy operations were done one byte at a time, but
+are now done a string at a time.
+This optimization reduced the time to process
+an argument list by a factor of ten;
+the average time to do an \fIexec\fP call decreased by 25%.
+.NH 3
+Context Switching
+.PP
+The kernel used to post a software event when it wanted to force
+a process to be rescheduled.
+Often the process would be rescheduled for other reasons before
+exiting the kernel, delaying the event trap.
+At some later time the process would again
+be selected to run and would complete its pending system call,
+finally causing the event to take place.
+The event would cause the scheduler to be invoked a second time
+selecting the same process to run.
+The fix to this problem is to cancel any software reschedule
+events when saving a process context.
+This change doubles the speed with which processes
+can synchronize using pipes or signals.
+.NH 3
+Setjmp/Longjmp
+.PP
+The kernel routine \fIsetjmp\fP, that saves the current system
+context in preparation for a non-local goto used to save many more
+registers than necessary under most circumstances.
+By trimming its operation to save only the minimum state required,
+the overhead for system calls decreased by an average of 13%.
+.NH 3
+Compensating for Lack of Compiler Technology
+.PP
+The current compilers available for C do not
+do any significant optimization.
+Good optimizing compilers are unlikely to be built;
+the C language is not well suited to optimization
+because of its rampant use of unbound pointers.
+Thus, many classical optimizations such as common subexpression
+analysis and selection of register variables must be done
+by hand using ``exterior'' knowledge of when such optimizations are safe.
+.PP
+Another optimization usually done by optimizing compilers
+is inline expansion of small or frequently used routines.
+In past Berkeley systems this has been done by using \fIsed\fP to
+run over the assembly language and replace calls to small
+routines with the code for the body of the routine, often
+a single VAX instruction.
+While this optimization eliminated the cost of the subroutine
+call and return,
+it did not eliminate the pushing and popping of several arguments
+to the routine.
+The \fIsed\fP script has been replaced by a more intelligent expander,
+\fIinline\fP, that merges the pushes and pops into moves to registers.
+For example, if the C code
+.DS
+if (scanc(map[i], 1, 47, i - 63))
+.DE
+is compiled into assembly language it generates the code shown
+in the left hand column of Table 11.
+The \fIsed\fP inline expander changes this code to that
+shown in the middle column.
+The newer optimizer eliminates most of the stack
+operations to generate the code shown in the right hand column.
+.KF
+.TS
+center, box;
+c s s s s s
+c s | c s | c s
+l l | l l | l l.
+Alternative C Language Code Optimizations
+_
+cc sed inline
+_
+subl3 $64,_i,\-(sp) subl3 $64,_i,\-(sp) subl3 $64,_i,r5
+pushl $47 pushl $47 movl $47,r4
+pushl $1 pushl $1 pushl $1
+mull2 $16,_i,r3 mull2 $16,_i,r3 mull2 $16,_i,r3
+pushl \-56(fp)[r3] pushl \-56(fp)[r3] movl \-56(fp)[r3],r2
+calls $4,_scanc movl (sp)+,r5 movl (sp)+,r3
+tstl r0 movl (sp)+,r4 scanc r2,(r3),(r4),r5
+jeql L7 movl (sp)+,r3 tstl r0
+ movl (sp)+,r2 jeql L7
+ scanc r2,(r3),(r4),r5
+ tstl r0
+ jeql L7
+.TE
+.ce
+Table 11. Alternative inline code expansions.
+.KE
+.PP
+Another optimization involved reevaluating
+existing data structures in the context of the current system.
+For example, disk buffer hashing was implemented when the system
+typically had thirty to fifty buffers.
+Most systems today have 200 to 1000 buffers.
+Consequently, most of the hash chains contained
+ten to a hundred buffers each!
+The running time of the low level buffer management primitives was
+dramatically improved simply by enlarging the size of the hash table.
+.NH 2
+Improvements to Libraries and Utilities
+.PP
+Intuitively, changes to the kernel would seem to have the greatest
+payoff since they affect all programs that run on the system.
+However, the kernel has been tuned many times before, so the
+opportunity for significant improvement was small.
+By contrast, many of the libraries and utilities had never been tuned.
+For example, we found utilities that spent 90% of their
+running time doing single character read system calls.
+Changing the utility to use the standard I/O library cut the
+running time by a factor of five!
+Thus, while most of our time has been spent tuning the kernel,
+more than half of the speedups are because of improvements in
+other parts of the system.
+Some of the more dramatic changes are described in the following
+subsections.
+.NH 3
+Hashed Databases
+.PP
+UNIX provides a set of database management routines, \fIdbm\fP,
+that can be used to speed lookups in large data files
+with an external hashed index file.
+The original version of dbm was designed to work with only one
+database at a time. These routines were generalized to handle
+multiple database files, enabling them to be used in rewrites
+of the password and host file lookup routines. The new routines
+used to access the password file significantly improve the running
+time of many important programs such as the mail subsystem,
+the C-shell (in doing tilde expansion), \fIls \-l\fP, etc.
+.NH 3
+Buffered I/O
+.PP
+The new filesystem with its larger block sizes allows better
+performance, but it is possible to degrade system performance
+by performing numerous small transfers rather than using
+appropriately-sized buffers.
+The standard I/O library
+automatically determines the optimal buffer size for each file.
+Some C library routines and commonly-used programs use low-level
+I/O or their own buffering, however.
+Several important utilities that did not use the standard I/O library
+and were buffering I/O using the old optimal buffer size,
+1Kbytes; the programs were changed to buffer I/O according to the
+optimal file system blocksize.
+These include the editor, the assembler, loader, remote file copy,
+the text formatting programs, and the C compiler.
+.PP
+The standard error output has traditionally been unbuffered
+to prevent delay in presenting the output to the user,
+and to prevent it from being lost if buffers are not flushed.
+The inordinate expense of sending single-byte packets through
+the network led us to impose a buffering scheme on the standard
+error stream.
+Within a single call to \fIfprintf\fP, all output is buffered temporarily.
+Before the call returns, all output is flushed and the stream is again
+marked unbuffered.
+As before, the normal block or line buffering mechanisms can be used
+instead of the default behavior.
+.PP
+It is possible for programs with good intentions to unintentionally
+defeat the standard I/O library's choice of I/O buffer size by using
+the \fIsetbuf\fP call to assign an output buffer.
+Because of portability requirements, the default buffer size provided
+by \fIsetbuf\fP is 1024 bytes; this can lead, once again, to added
+overhead.
+One such program with this problem was \fIcat\fP;
+there are undoubtedly other standard system utilities with similar problems
+as the system has changed much since they were originally written.
+.NH 3
+Mail System
+.PP
+The problems discussed in section 3.1.1 prompted significant work
+on the entire mail system. The first problem identified was a bug
+in the \fIsyslog\fP program. The mail delivery program, \fIsendmail\fP
+logs all mail transactions through this process with the 4.2BSD interprocess
+communication facilities. \fISyslog\fP then records the information in
+a log file. Unfortunately, \fIsyslog\fP was performing a \fIsync\fP
+operation after each message it received, whether it was logged to a file
+or not. This wreaked havoc on the effectiveness of the
+buffer cache and explained, to a large
+extent, why sending mail to large distribution lists generated such a
+heavy load on the system (one syslog message was generated for each
+message recipient causing almost a continuous sequence of sync operations).
+.PP
+The hashed data base files were
+installed in all mail programs, resulting in a order of magnitude
+speedup on large distribution lists. The code in \fI/bin/mail\fP
+that notifies the \fIcomsat\fP program when mail has been delivered to
+a user was changed to cache host table lookups, resulting in a similar
+speedup on large distribution lists.
+.PP
+Next, the file locking facilities
+provided in 4.2BSD, \fIflock\fP\|(2), were used in place of the old
+locking mechanism.
+The mail system previously used \fIlink\fP and \fIunlink\fP in
+implementing file locking primitives.
+Because these operations usually modify the contents of directories
+they require synchronous disk operations and cannot take
+advantage of the name cache maintained by the system.
+Unlink requires that the entry be found in the directory so that
+it can be removed;
+link requires that the directory be scanned to insure that the name
+does not already exist.
+By contrast the advisory locking facility in 4.2BSD is
+efficient because it is all done with in-memory tables.
+Thus, the mail system was modified to use the file locking primitives.
+This yielded another 10% cut in the basic overhead of delivering mail.
+Extensive profiling and tuning of \fIsendmail\fP and
+compiling it without debugging code reduced the overhead by another 20%.
+.NH 3
+Network Servers
+.PP
+With the introduction of the network facilities in 4.2BSD,
+a myriad of services became available, each of which
+required its own daemon process.
+Many of these daemons were rarely if ever used,
+yet they lay asleep in the process table consuming
+system resources and generally slowing down response.
+Rather than having many servers started at boot time, a single server,
+\fIinetd\fP was substituted.
+This process reads a simple configuration file
+that specifies the services the system is willing to support
+and listens for service requests on each service's Internet port.
+When a client requests service the appropriate server is created
+and passed a service connection as its standard input. Servers
+that require the identity of their client may use the \fIgetpeername\fP
+system call; likewise \fIgetsockname\fP may be used to find out
+a server's local address without consulting data base files.
+This scheme is attractive for several reasons:
+.IP \(bu 3
+it eliminates
+as many as a dozen processes, easing system overhead and
+allowing the file and text tables to be made smaller,
+.IP \(bu 3
+servers need not contain the code required to handle connection
+queueing, simplifying the programs, and
+.IP \(bu 3
+installing and replacing servers becomes simpler.
+.PP
+With an increased numbers of networks, both local and external to Berkeley,
+we found that the overhead of the routing process was becoming
+inordinately high.
+Several changes were made in the routing daemon to reduce this load.
+Routes to external networks are no longer exchanged by routers
+on the internal machines, only a route to a default gateway.
+This reduces the amount of network traffic and the time required
+to process routing messages.
+In addition, the routing daemon was profiled
+and functions responsible for large amounts
+of time were optimized.
+The major changes were a faster hashing scheme,
+and inline expansions of the ubiquitous byte-swapping functions.
+.PP
+Under certain circumstances, when output was blocked,
+attempts by the remote login process
+to send output to the user were rejected by the system,
+although a prior \fIselect\fP call had indicated that data could be sent.
+This resulted in continuous attempts to write the data until the remote
+user restarted output.
+This problem was initially avoided in the remote login handler,
+and the original problem in the kernel has since been corrected.
+.NH 3
+The C Run-time Library
+.PP
+Several people have found poorly tuned code
+in frequently used routines in the C library [Lankford84].
+In particular the running time of the string routines can be
+cut in half by rewriting them using the VAX string instructions.
+The memory allocation routines have been tuned to waste less
+memory for memory allocations with sizes that are a power of two.
+Certain library routines that did file input in one-character reads
+have been corrected.
+Other library routines including \fIfread\fP and \fIfwrite\fP
+have been rewritten for efficiency.
+.NH 3
+Csh
+.PP
+The C-shell was converted to run on 4.2BSD by
+writing a set of routines to simulate the old jobs library.
+While this provided a functioning C-shell,
+it was grossly inefficient, generating up
+to twenty system calls per prompt.
+The C-shell has been modified to use the new signal
+facilities directly,
+cutting the number of system calls per prompt in half.
+Additional tuning was done with the help of profiling
+to cut the cost of frequently used facilities.
diff --git a/share/doc/papers/sysperf/5.t b/share/doc/papers/sysperf/5.t
new file mode 100644
index 0000000..5d70a9a
--- /dev/null
+++ b/share/doc/papers/sysperf/5.t
@@ -0,0 +1,285 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)5.t 5.1 (Berkeley) 4/17/91
+.\"
+.ds RH Functional Extensions
+.NH
+Functional Extensions
+.PP
+Some of the facilities introduced in 4.2BSD were not completely
+implemented. An important part of the effort that went into
+4.3BSD was to clean up and unify both new and old facilities.
+.NH 2
+Kernel Extensions
+.PP
+A significant effort went into improving
+the networking part of the kernel.
+The work consisted of fixing bugs,
+tuning the algorithms,
+and revamping the lowest levels of the system
+to better handle heterogeneous network topologies.
+.NH 3
+Subnets, Broadcasts and Gateways
+.PP
+To allow sites to expand their network in an autonomous
+and orderly fashion, subnetworks have been introduced in 4.3BSD [GADS85].
+This facility allows sites to subdivide their local Internet address
+space into multiple subnetwork address spaces that are visible
+only by hosts at that site. To off-site hosts machines on a site's
+subnetworks appear to reside on a single network. The routing daemon
+has been reworked to provide routing support in this type of
+environment.
+.PP
+The default Internet broadcast address is now specified with a host part
+of all one's, rather than all zero's.
+The broadcast address may be set at boot time on a per-interface basis.
+.NH 3
+Interface Addressing
+.PP
+The organization of network interfaces has been
+reworked to more cleanly support multiple
+network protocols. Network interfaces no longer
+contain a host's address on that network; instead
+each interface contains a pointer to a list of addresses
+assigned to that interface. This permits a single
+interface to support, for example, Internet protocols
+at the same time as XNS protocols.
+.PP
+The Address Resolution Protocol (ARP) support
+for 10 megabyte/second Ethernet\(dg
+.FS
+\(dg Ethernet is a trademark of Xerox.
+.FE
+has been made more flexible by allowing hosts to
+act as an ``clearing house'' for hosts that do
+not support ARP. In addition, system managers have
+more control over the contents of the ARP translation
+cache and may interactively interrogate and modify
+the cache's contents.
+.NH 3
+User Control of Network Buffering
+.PP
+Although the system allocates reasonable default amounts of buffering
+for most connections, certain operations such as file system dumps
+to remote machines benefit from significant increases in buffering [Walsh84].
+The \fIsetsockopt\fP system call has been extended to allow such requests.
+In addition, \fIgetsockopt\fP and \fIsetsockopt\fP,
+are now interfaced to the protocol level allowing protocol-specific
+options to be manipulated by the user.
+.NH 3
+Number of File Descriptors
+.PP
+To allow full use of the many descriptor based services available,
+the previous hard limit of 30 open files per process has been relaxed.
+The changes entailed generalizing \fIselect\fP to handle arrays of
+32-bit words, removing the dependency on file descriptors from
+the page table entries,
+and limiting most of the linear scans of a process's file table.
+The default per-process descriptor limit was raised from 20 to 64,
+though there are no longer any hard upper limits on the number
+of file descriptors.
+.NH 3
+Kernel Limits
+.PP
+Many internal kernel configuration limits have been increased by suitable
+modifications to data structures.
+The limit on physical memory has been changed from 8 megabyte to 64 megabyte,
+and the limit of 15 mounted file systems has been changed to 255.
+The maximum file system size has been increased to 8 gigabyte,
+number of processes to 65536,
+and per process size to 64 megabyte of data and 64 megabyte of stack.
+Note that these are upper bounds,
+the default limits for these quantities are tuned for systems
+with 4-8 megabyte of physical memory.
+.NH 3
+Memory Management
+.PP
+The global clock page replacement algorithm used to have a single
+hand that was used both to mark and to reclaim memory.
+The first time that it encountered a page it would clear its reference bit.
+If the reference bit was still clear on its next pass across the page,
+it would reclaim the page.
+The use of a single hand does not work well with large physical
+memories as the time to complete a single revolution of the hand
+can take up to a minute or more.
+By the time the hand gets around to the marked pages,
+the information is usually no longer pertinent.
+During periods of sudden shortages,
+the page daemon will not be able to find any reclaimable pages until
+it has completed a full revolution.
+To alleviate this problem,
+the clock hand has been split into two separate hands.
+The front hand clears the reference bits,
+the back hand follows a constant number of pages behind
+reclaiming pages that still have cleared reference bits.
+While the code has been written to allow the distance between
+the hands to be varied, we have not found any algorithms
+suitable for determining how to dynamically adjust this distance.
+.PP
+The configuration of the virtual memory system used to require
+a significant understanding of its operation to do such
+simple tasks as increasing the maximum process size.
+This process has been significantly improved so that the most
+common configuration parameters, such as the virtual memory sizes,
+can be specified using a single option in the configuration file.
+Standard configurations support data and stack segments
+of 17, 33 and 64 megabytes.
+.NH 3
+Signals
+.PP
+The 4.2BSD signal implementation would push several words
+onto the normal run-time stack before switching to an
+alternate signal stack.
+The 4.3BSD implementation has been corrected so that
+the entire signal handler's state is now pushed onto the signal stack.
+Another limitation in the original signal implementation was
+that it used an undocumented system call to return from signals.
+Users could not write their own return from exceptions;
+4.3BSD formally specifies the \fIsigreturn\fP system call.
+.PP
+Many existing programs depend on interrupted system calls.
+The restartable system call semantics of 4.2BSD signals caused
+many of these programs to break.
+To simplify porting of programs from inferior versions of
+.UX
+the \fIsigvec\fP system call has been extended so that
+programmers may specify that system calls are not to be
+restarted after particular signals.
+.NH 3
+System Logging
+.PP
+A system logging facility has been added
+that sends kernel messages to the
+syslog daemon for logging in /usr/adm/messages and possibly for
+printing on the system console.
+The revised scheme for logging messages
+eliminates the time lag in updating the messages file,
+unifies the format of kernel messages,
+provides a finer granularity of control over the messages
+that get printed on the console,
+and eliminates the degradation in response during the printing of
+low-priority kernel messages.
+Recoverable system errors and common resource limitations are logged
+using this facility.
+Most system utilities such as init and login,
+have been modified to log errors to syslog
+rather than writing directly on the console.
+.NH 3
+Windows
+.PP
+The tty structure has been augmented to hold
+information about the size
+of an associated window or terminal.
+These sizes can be obtained by programs such as editors that want
+to know the size of the screen they are manipulating.
+When these sizes are changed,
+a new signal, SIGWINCH, is sent the current process group.
+The editors have been modified to catch this signal and reshape
+their view of the world, and the remote login program and server
+now cooperate to propagate window sizes and window size changes
+across a network.
+Other programs and libraries such as curses that need the width
+or height of the screen have been modified to use this facility as well.
+.NH 3
+Configuration of UNIBUS Devices
+.PP
+The UNIBUS configuration routines have been extended to allow auto-configuration
+of dedicated UNIBUS memory held by devices.
+The new routines simplify the configuration of memory-mapped devices
+and correct problems occurring on reset of the UNIBUS.
+.NH 3
+Disk Recovery from Errors
+.PP
+The MASSBUS disk driver's error recovery routines have been fixed to
+retry before correcting ECC errors, support ECC on bad-sector replacements,
+and correctly attempt retries after earlier
+corrective actions in the same transfer.
+The error messages are more accurate.
+.NH 2
+Functional Extensions to Libraries and Utilities
+.PP
+Most of the changes to the utilities and libraries have been to
+allow them to handle a more general set of problems,
+or to handle the same set of problems more quickly.
+.NH 3
+Name Server
+.PP
+In 4.2BSD the name resolution routines (\fIgethostbyname\fP,
+\fIgetservbyname\fP,
+etc.) were implemented by a set of database files maintained on the
+local machine.
+Inconsistencies or obsolescence in these files resulted in inaccessibility of
+hosts or services.
+In 4.3BSD these files may be replaced by a network name server that can
+insure a consistent view of the name space in a multimachine environment.
+This name server operates in accordance with Internet standards
+for service on the ARPANET [Mockapetris83].
+.NH 3
+System Management
+.PP
+A new utility, \fIrdist\fP,
+has been provided to assist system managers in keeping
+all their machines up to date with a consistent set of sources and binaries.
+A master set of sources may reside on a single central machine,
+or be distributed at (known) locations throughout the environment.
+New versions of \fIgetty\fP, \fIinit\fP, and \fIlogin\fP
+merge the functions of several
+files into a single place, and allow more flexibility in the
+startup of processes such as window managers.
+.PP
+The new utility \fItimed\fP keeps the time on a group of cooperating machines
+(within a single LAN) synchronized to within 30 milliseconds.
+It does its corrections using a new system call that changes
+the rate of time advance without stopping or reversing the system clock.
+It normally selects one machine to act as a master.
+If the master dies or is partitioned, a new master is elected.
+Other machines may participate in a purely slave role.
+.NH 3
+Routing
+.PP
+Many bugs in the routing daemon have been fixed;
+it is considerably more robust,
+and now understands how to properly deal with
+subnets and point-to-point networks.
+Its operation has been made more efficient by tuning with the use
+of execution profiles, along with inline expansion of common operations
+using the kernel's \fIinline\fP optimizer.
+.NH 3
+Compilers
+.PP
+The symbolic debugger \fIdbx\fP has had many new features added,
+and all the known bugs fixed. In addition \fIdbx\fP
+has been extended to work with the Pascal compiler.
+The fortran compiler \fIf77\fP has had numerous bugs fixed.
+The C compiler has been modified so that it can, optionally,
+generate single precision floating point instructions when operating
+on single precision variables.
diff --git a/share/doc/papers/sysperf/6.t b/share/doc/papers/sysperf/6.t
new file mode 100644
index 0000000..a445ee1
--- /dev/null
+++ b/share/doc/papers/sysperf/6.t
@@ -0,0 +1,70 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)6.t 5.1 (Berkeley) 4/17/91
+.\"
+.ds RH Security Tightening
+.NH
+Security Tightening
+.PP
+Since we do not wish to encourage rampant system cracking,
+we describe only briefly the changes made to enhance security.
+.NH 2
+Generic Kernel
+.PP
+Several loopholes in the process tracing facility have been corrected.
+Programs being traced may not be executed;
+executing programs may not be traced.
+Programs may not provide input to terminals to which they do not
+have read permission.
+The handling of process groups has been tightened to eliminate
+some problems.
+When a program attempts to change its process group,
+the system checks to see if the process with the pid of the process
+group was started by the same user.
+If it exists and was started by a different user the process group
+number change is denied.
+.NH 2
+Security Problems in Utilities
+.PP
+Setuid utilities no longer use the \fIpopen\fP or \fIsystem\fP library routines.
+Access to the kernel's data structures through the kmem device
+is now restricted to programs that are set group id ``kmem''.
+Thus many programs that used to run with root privileges
+no longer need to do so.
+Access to disk devices is now controlled by an ``operator'' group id;
+this permission allows operators to function without being the super-user.
+Only users in group wheel can do ``su root''; this restriction
+allows administrators to define a super-user access list.
+Numerous holes have been closed in the shell to prevent
+users from gaining privileges from set user id shell scripts,
+although use of such scripts is still highly discouraged on systems
+that are concerned about security.
diff --git a/share/doc/papers/sysperf/7.t b/share/doc/papers/sysperf/7.t
new file mode 100644
index 0000000..68f5717
--- /dev/null
+++ b/share/doc/papers/sysperf/7.t
@@ -0,0 +1,164 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)7.t 5.1 (Berkeley) 4/17/91
+.\"
+.ds RH Conclusions
+.NH
+Conclusions
+.PP
+4.2BSD, while functionally superior to 4.1BSD, lacked much of the
+performance tuning required of a good system. We found that
+the distributed system spent 10-20% more time in the kernel than
+4.1BSD. This added overhead combined with problems with several
+user programs severely limited the overall performance of the
+system in a general timesharing environment.
+.PP
+Changes made to the system since the 4.2BSD distribution have
+eliminated most of the
+added system overhead by replacing old algorithms
+or introducing additional cacheing schemes.
+The combined caches added to the name translation process
+reduce the average cost of translating a pathname to an inode by more than 50%.
+These changes reduce the percentage of time spent running
+in the system by nearly 9%.
+.PP
+The use of silo input on terminal ports only when necessary
+has allowed the system to avoid a large amount of software interrupt
+processing. Observations show that the system is forced to
+field about 25% fewer interrupts than before.
+.PP
+The kernel
+changes, combined with many bug fixes, make the system much more
+responsive in a general timesharing environment.
+The 4.3BSD Berkeley UNIX system now appears
+capable of supporting loads at least as large as those supported under
+4.1BSD while providing all the new interprocess communication, networking,
+and file system facilities.
+.nr H2 1
+.ds RH Acknowledgements
+.SH
+\s+2Acknowledgements\s0
+.PP
+We would like to thank Robert Elz for sharing his ideas and
+his code for cacheing system wide names and searching the process table.
+We thank Alan Smith for initially suggesting the use of a
+capability based cache.
+We also acknowledge
+George Goble who dropped many of our changes
+into his production system and reported back fixes to the
+disasters that they caused.
+The buffer cache read-ahead trace package was based
+on a program written by Jim Lawson. Ralph Campbell
+implemented several of the C library changes. The original
+version of the Internet daemon was written by Bill Joy.
+In addition,
+we would like to thank the many other people that contributed
+ideas, information, and work while the system was undergoing change.
+.ds RH References
+.nr H2 1
+.sp 2
+.SH
+\s+2References\s-2
+.LP
+.IP [Cabrera84] 20
+Luis Felipe Cabrera, Eduard Hunter, Michael J. Karels, and David Mosher,
+``A User-Process Oriented Performance Study of Ethernet Networking Under
+Berkeley UNIX 4.2BSD,''
+Research Report No. UCB/CSD 84/217, University of California,
+Berkeley, December 1984.
+.IP [Cabrera85] 20
+Luis Felipe Cabrera, Michael J. Karels, and David Mosher,
+``The Impact of Buffer Management on Networking Software Performance
+in Berkeley UNIX 4.2BSD: A Case Study,''
+Proceedings of the Summer Usenix Conference, Portland, Oregon,
+June 1985, pp. 507-517.
+.IP [GADS85] 20
+GADS (Gateway Algorithms and Data Structures Task Force),
+``Toward an Internet Standard for Subnetting,'' RFC-940,
+Network Information Center, SRI International,
+April 1985.
+.IP [Joy80] 20
+Joy, William,
+``Comments on the performance of UNIX on the VAX'',
+Computer System Research Group, U.C. Berkeley.
+April 1980.
+.IP [Kashtan80] 20
+Kashtan, David L.,
+``UNIX and VMS, Some Performance Comparisons'',
+SRI International. February 1980.
+.IP [Lankford84] 20
+Jeffrey Lankford,
+``UNIX System V and 4BSD Performance,''
+\fIProceedings of the Salt Lake City Usenix Conference\fP,
+pp 228-236, June 1984.
+.IP [Leffler84] 20
+Sam Leffler, Mike Karels, and M. Kirk McKusick,
+``Measuring and Improving the Performance of 4.2BSD,''
+\fIProceedings of the Salt Lake City Usenix Conference\fP,
+pp 237-252, June 1984.
+.IP [McKusick85]
+M. Kirk McKusick, Mike Karels, and Samual Leffler,
+``Performance Improvements and Functional Enhancements in 4.3BSD''
+\fIProceedings of the Portland Usenix Conference\fP,
+pp 519-531, June 1985.
+.IP [Mockapetris83] 20
+Paul Mockapetris, ``Domain Names \- Implementation and Schedule,''
+Network Information Center, SRI International,
+RFC-883,
+November 1983.
+.IP [Mogul84] 20
+Jeffrey Mogul, ``Broadcasting Internet Datagrams,'' RFC-919,
+Network Information Center, SRI International,
+October 1984.
+.IP [Mosher80] 20
+Mosher, David,
+``UNIX Performance, an Introspection'',
+Presented at the Boulder, Colorado Usenix Conference, January 1980.
+Copies of the paper are available from
+Computer System Research Group, U.C. Berkeley.
+.IP [Nagle84] 20
+John Nagle, ``Congestion Control in IP/TCP Internetworks,'' RFC-896,
+Network Information Center, SRI International,
+January 1984.
+.IP [Ritchie74] 20
+Ritchie, D. M. and Thompson, K.,
+``The UNIX Time-Sharing System'',
+CACM 17, 7. July 1974. pp 365-375
+.IP [Shannon83] 20
+Shannon, W.,
+private communication,
+July 1983
+.IP [Walsh84] 20
+Robert Walsh and Robert Gurwitz,
+``Converting BBN TCP/IP to 4.2BSD,''
+\fIProceedings of the Salt Lake City Usenix Conference\fP,
+pp 52-61, June 1984.
diff --git a/share/doc/papers/sysperf/Makefile b/share/doc/papers/sysperf/Makefile
new file mode 100644
index 0000000..b65852bf
--- /dev/null
+++ b/share/doc/papers/sysperf/Makefile
@@ -0,0 +1,22 @@
+# @(#)Makefile 1.6 (Berkeley) 6/8/93
+
+DIR= papers/sysperf
+MACROS= -ms
+SRCS= 0.t 1.t 2.t 3.t 4.t 5.t 6.t 7.t
+EXTRA= a1.t a2.t
+OBJS= paper.tmp appendix.tmp
+CLEANFILES+=${OBJS}
+
+paper.ps: ${OBJS}
+ ${ROFF} ${OBJS} > ${.TARGET}
+
+paper.tmp: ${SRCS}
+ ${TBL} ${SRCS} | ${EQN} > paper.tmp
+
+appendix.tmp: a1.t a2.t
+ ${GRIND} -f a1.t | awk '/\.\(\)/{ cnt = 2 } \
+ { if (cnt) cnt -= 1; else print $$0; } ' > appendix.tmp
+ ${GRIND} -f -lcsh a2.t | awk '/\.\(\)/{ cnt = 2 } \
+ { if (cnt) cnt -= 1; else print $$0; } ' >> appendix.tmp
+
+.include <bsd.doc.mk>
diff --git a/share/doc/papers/sysperf/a1.t b/share/doc/papers/sysperf/a1.t
new file mode 100644
index 0000000..b94f6aa
--- /dev/null
+++ b/share/doc/papers/sysperf/a1.t
@@ -0,0 +1,668 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)a1.t 5.1 (Berkeley) 4/17/91
+.\"
+.ds RH Appendix A \- Benchmark sources
+.nr H2 1
+.sp 2
+.de vS
+.nf
+..
+.de vE
+.fi
+..
+.bp
+.SH
+\s+2Appendix A \- Benchmark sources\s-2
+.LP
+The programs shown here run under 4.2 with only routines
+from the standard libraries. When run under 4.1 they were augmented
+with a \fIgetpagesize\fP routine and a copy of the \fIrandom\fP
+function from the C library. The \fIvforks\fP and \fIvexecs\fP
+programs are constructed from the \fIforks\fP and \fIexecs\fP programs,
+respectively, by substituting calls to \fIfork\fP with calls to
+\fIvfork\fP.
+.SH
+syscall
+.LP
+.vS
+/*
+ * System call overhead benchmark.
+ */
+main(argc, argv)
+ char *argv[];
+{
+ register int ncalls;
+
+ if (argc < 2) {
+ printf("usage: %s #syscalls\n", argv[0]);
+ exit(1);
+ }
+ ncalls = atoi(argv[1]);
+ while (ncalls-- > 0)
+ (void) getpid();
+}
+.vE
+.SH
+csw
+.LP
+.vS
+/*
+ * Context switching benchmark.
+ *
+ * Force system to context switch 2*nsigs
+ * times by forking and exchanging signals.
+ * To calculate system overhead for a context
+ * switch, the signocsw program must be run
+ * with nsigs. Overhead is then estimated by
+ * t1 = time csw <n>
+ * t2 = time signocsw <n>
+ * overhead = t1 - 2 * t2;
+ */
+#include <signal.h>
+
+int sigsub();
+int otherpid;
+int nsigs;
+
+main(argc, argv)
+ char *argv[];
+{
+ int pid;
+
+ if (argc < 2) {
+ printf("usage: %s nsignals\n", argv[0]);
+ exit(1);
+ }
+ nsigs = atoi(argv[1]);
+ signal(SIGALRM, sigsub);
+ otherpid = getpid();
+ pid = fork();
+ if (pid != 0) {
+ otherpid = pid;
+ kill(otherpid, SIGALRM);
+ }
+ for (;;)
+ sigpause(0);
+}
+
+sigsub()
+{
+
+ signal(SIGALRM, sigsub);
+ kill(otherpid, SIGALRM);
+ if (--nsigs <= 0)
+ exit(0);
+}
+.vE
+.SH
+signocsw
+.LP
+.vS
+/*
+ * Signal without context switch benchmark.
+ */
+#include <signal.h>
+
+int pid;
+int nsigs;
+int sigsub();
+
+main(argc, argv)
+ char *argv[];
+{
+ register int i;
+
+ if (argc < 2) {
+ printf("usage: %s nsignals\n", argv[0]);
+ exit(1);
+ }
+ nsigs = atoi(argv[1]);
+ signal(SIGALRM, sigsub);
+ pid = getpid();
+ for (i = 0; i < nsigs; i++)
+ kill(pid, SIGALRM);
+}
+
+sigsub()
+{
+
+ signal(SIGALRM, sigsub);
+}
+.vE
+.SH
+pipeself
+.LP
+.vS
+/*
+ * IPC benchmark,
+ * write to self using pipes.
+ */
+
+main(argc, argv)
+ char *argv[];
+{
+ char buf[512];
+ int fd[2], msgsize;
+ register int i, iter;
+
+ if (argc < 3) {
+ printf("usage: %s iterations message-size\n", argv[0]);
+ exit(1);
+ }
+ argc--, argv++;
+ iter = atoi(*argv);
+ argc--, argv++;
+ msgsize = atoi(*argv);
+ if (msgsize > sizeof (buf) || msgsize <= 0) {
+ printf("%s: Bad message size.\n", *argv);
+ exit(2);
+ }
+ if (pipe(fd) < 0) {
+ perror("pipe");
+ exit(3);
+ }
+ for (i = 0; i < iter; i++) {
+ write(fd[1], buf, msgsize);
+ read(fd[0], buf, msgsize);
+ }
+}
+.vE
+.SH
+pipediscard
+.LP
+.vS
+/*
+ * IPC benchmarkl,
+ * write and discard using pipes.
+ */
+
+main(argc, argv)
+ char *argv[];
+{
+ char buf[512];
+ int fd[2], msgsize;
+ register int i, iter;
+
+ if (argc < 3) {
+ printf("usage: %s iterations message-size\n", argv[0]);
+ exit(1);
+ }
+ argc--, argv++;
+ iter = atoi(*argv);
+ argc--, argv++;
+ msgsize = atoi(*argv);
+ if (msgsize > sizeof (buf) || msgsize <= 0) {
+ printf("%s: Bad message size.\n", *argv);
+ exit(2);
+ }
+ if (pipe(fd) < 0) {
+ perror("pipe");
+ exit(3);
+ }
+ if (fork() == 0)
+ for (i = 0; i < iter; i++)
+ read(fd[0], buf, msgsize);
+ else
+ for (i = 0; i < iter; i++)
+ write(fd[1], buf, msgsize);
+}
+.vE
+.SH
+pipeback
+.LP
+.vS
+/*
+ * IPC benchmark,
+ * read and reply using pipes.
+ *
+ * Process forks and exchanges messages
+ * over a pipe in a request-response fashion.
+ */
+
+main(argc, argv)
+ char *argv[];
+{
+ char buf[512];
+ int fd[2], fd2[2], msgsize;
+ register int i, iter;
+
+ if (argc < 3) {
+ printf("usage: %s iterations message-size\n", argv[0]);
+ exit(1);
+ }
+ argc--, argv++;
+ iter = atoi(*argv);
+ argc--, argv++;
+ msgsize = atoi(*argv);
+ if (msgsize > sizeof (buf) || msgsize <= 0) {
+ printf("%s: Bad message size.\n", *argv);
+ exit(2);
+ }
+ if (pipe(fd) < 0) {
+ perror("pipe");
+ exit(3);
+ }
+ if (pipe(fd2) < 0) {
+ perror("pipe");
+ exit(3);
+ }
+ if (fork() == 0)
+ for (i = 0; i < iter; i++) {
+ read(fd[0], buf, msgsize);
+ write(fd2[1], buf, msgsize);
+ }
+ else
+ for (i = 0; i < iter; i++) {
+ write(fd[1], buf, msgsize);
+ read(fd2[0], buf, msgsize);
+ }
+}
+.vE
+.SH
+forks
+.LP
+.vS
+/*
+ * Benchmark program to calculate fork+wait
+ * overhead (approximately). Process
+ * forks and exits while parent waits.
+ * The time to run this program is used
+ * in calculating exec overhead.
+ */
+
+main(argc, argv)
+ char *argv[];
+{
+ register int nforks, i;
+ char *cp;
+ int pid, child, status, brksize;
+
+ if (argc < 2) {
+ printf("usage: %s number-of-forks sbrk-size\n", argv[0]);
+ exit(1);
+ }
+ nforks = atoi(argv[1]);
+ if (nforks < 0) {
+ printf("%s: bad number of forks\n", argv[1]);
+ exit(2);
+ }
+ brksize = atoi(argv[2]);
+ if (brksize < 0) {
+ printf("%s: bad size to sbrk\n", argv[2]);
+ exit(3);
+ }
+ cp = (char *)sbrk(brksize);
+ if ((int)cp == -1) {
+ perror("sbrk");
+ exit(4);
+ }
+ for (i = 0; i < brksize; i += 1024)
+ cp[i] = i;
+ while (nforks-- > 0) {
+ child = fork();
+ if (child == -1) {
+ perror("fork");
+ exit(-1);
+ }
+ if (child == 0)
+ _exit(-1);
+ while ((pid = wait(&status)) != -1 && pid != child)
+ ;
+ }
+ exit(0);
+}
+.vE
+.SH
+execs
+.LP
+.vS
+/*
+ * Benchmark program to calculate exec
+ * overhead (approximately). Process
+ * forks and execs "null" test program.
+ * The time to run the fork program should
+ * then be deducted from this one to
+ * estimate the overhead for the exec.
+ */
+
+main(argc, argv)
+ char *argv[];
+{
+ register int nexecs, i;
+ char *cp, *sbrk();
+ int pid, child, status, brksize;
+
+ if (argc < 3) {
+ printf("usage: %s number-of-execs sbrk-size job-name\n",
+ argv[0]);
+ exit(1);
+ }
+ nexecs = atoi(argv[1]);
+ if (nexecs < 0) {
+ printf("%s: bad number of execs\n", argv[1]);
+ exit(2);
+ }
+ brksize = atoi(argv[2]);
+ if (brksize < 0) {
+ printf("%s: bad size to sbrk\n", argv[2]);
+ exit(3);
+ }
+ cp = sbrk(brksize);
+ if ((int)cp == -1) {
+ perror("sbrk");
+ exit(4);
+ }
+ for (i = 0; i < brksize; i += 1024)
+ cp[i] = i;
+ while (nexecs-- > 0) {
+ child = fork();
+ if (child == -1) {
+ perror("fork");
+ exit(-1);
+ }
+ if (child == 0) {
+ execv(argv[3], argv);
+ perror("execv");
+ _exit(-1);
+ }
+ while ((pid = wait(&status)) != -1 && pid != child)
+ ;
+ }
+ exit(0);
+}
+.vE
+.SH
+nulljob
+.LP
+.vS
+/*
+ * Benchmark "null job" program.
+ */
+
+main(argc, argv)
+ char *argv[];
+{
+
+ exit(0);
+}
+.vE
+.SH
+bigjob
+.LP
+.vS
+/*
+ * Benchmark "null big job" program.
+ */
+/* 250 here is intended to approximate vi's text+data size */
+char space[1024 * 250] = "force into data segment";
+
+main(argc, argv)
+ char *argv[];
+{
+
+ exit(0);
+}
+.vE
+.bp
+.SH
+seqpage
+.LP
+.vS
+/*
+ * Sequential page access benchmark.
+ */
+#include <sys/vadvise.h>
+
+char *valloc();
+
+main(argc, argv)
+ char *argv[];
+{
+ register i, niter;
+ register char *pf, *lastpage;
+ int npages = 4096, pagesize, vflag = 0;
+ char *pages, *name;
+
+ name = argv[0];
+ argc--, argv++;
+again:
+ if (argc < 1) {
+usage:
+ printf("usage: %s [ -v ] [ -p #pages ] niter\n", name);
+ exit(1);
+ }
+ if (strcmp(*argv, "-p") == 0) {
+ argc--, argv++;
+ if (argc < 1)
+ goto usage;
+ npages = atoi(*argv);
+ if (npages <= 0) {
+ printf("%s: Bad page count.\n", *argv);
+ exit(2);
+ }
+ argc--, argv++;
+ goto again;
+ }
+ if (strcmp(*argv, "-v") == 0) {
+ argc--, argv++;
+ vflag++;
+ goto again;
+ }
+ niter = atoi(*argv);
+ pagesize = getpagesize();
+ pages = valloc(npages * pagesize);
+ if (pages == (char *)0) {
+ printf("Can't allocate %d pages (%2.1f megabytes).\n",
+ npages, (npages * pagesize) / (1024. * 1024.));
+ exit(3);
+ }
+ lastpage = pages + (npages * pagesize);
+ if (vflag)
+ vadvise(VA_SEQL);
+ for (i = 0; i < niter; i++)
+ for (pf = pages; pf < lastpage; pf += pagesize)
+ *pf = 1;
+}
+.vE
+.SH
+randpage
+.LP
+.vS
+/*
+ * Random page access benchmark.
+ */
+#include <sys/vadvise.h>
+
+char *valloc();
+int rand();
+
+main(argc, argv)
+ char *argv[];
+{
+ register int npages = 4096, pagesize, pn, i, niter;
+ int vflag = 0, debug = 0;
+ char *pages, *name;
+
+ name = argv[0];
+ argc--, argv++;
+again:
+ if (argc < 1) {
+usage:
+ printf("usage: %s [ -d ] [ -v ] [ -p #pages ] niter\n", name);
+ exit(1);
+ }
+ if (strcmp(*argv, "-p") == 0) {
+ argc--, argv++;
+ if (argc < 1)
+ goto usage;
+ npages = atoi(*argv);
+ if (npages <= 0) {
+ printf("%s: Bad page count.\n", *argv);
+ exit(2);
+ }
+ argc--, argv++;
+ goto again;
+ }
+ if (strcmp(*argv, "-v") == 0) {
+ argc--, argv++;
+ vflag++;
+ goto again;
+ }
+ if (strcmp(*argv, "-d") == 0) {
+ argc--, argv++;
+ debug++;
+ goto again;
+ }
+ niter = atoi(*argv);
+ pagesize = getpagesize();
+ pages = valloc(npages * pagesize);
+ if (pages == (char *)0) {
+ printf("Can't allocate %d pages (%2.1f megabytes).\n",
+ npages, (npages * pagesize) / (1024. * 1024.));
+ exit(3);
+ }
+ if (vflag)
+ vadvise(VA_ANOM);
+ for (i = 0; i < niter; i++) {
+ pn = random() % npages;
+ if (debug)
+ printf("touch page %d\n", pn);
+ pages[pagesize * pn] = 1;
+ }
+}
+.vE
+.SH
+gausspage
+.LP
+.vS
+/*
+ * Random page access with
+ * a gaussian distribution.
+ *
+ * Allocate a large (zero fill on demand) address
+ * space and fault the pages in a random gaussian
+ * order.
+ */
+
+float sqrt(), log(), rnd(), cos(), gauss();
+char *valloc();
+int rand();
+
+main(argc, argv)
+ char *argv[];
+{
+ register int pn, i, niter, delta;
+ register char *pages;
+ float sd = 10.0;
+ int npages = 4096, pagesize, debug = 0;
+ char *name;
+
+ name = argv[0];
+ argc--, argv++;
+again:
+ if (argc < 1) {
+usage:
+ printf(
+"usage: %s [ -d ] [ -p #pages ] [ -s standard-deviation ] iterations\n", name);
+ exit(1);
+ }
+ if (strcmp(*argv, "-s") == 0) {
+ argc--, argv++;
+ if (argc < 1)
+ goto usage;
+ sscanf(*argv, "%f", &sd);
+ if (sd <= 0) {
+ printf("%s: Bad standard deviation.\n", *argv);
+ exit(2);
+ }
+ argc--, argv++;
+ goto again;
+ }
+ if (strcmp(*argv, "-p") == 0) {
+ argc--, argv++;
+ if (argc < 1)
+ goto usage;
+ npages = atoi(*argv);
+ if (npages <= 0) {
+ printf("%s: Bad page count.\n", *argv);
+ exit(2);
+ }
+ argc--, argv++;
+ goto again;
+ }
+ if (strcmp(*argv, "-d") == 0) {
+ argc--, argv++;
+ debug++;
+ goto again;
+ }
+ niter = atoi(*argv);
+ pagesize = getpagesize();
+ pages = valloc(npages*pagesize);
+ if (pages == (char *)0) {
+ printf("Can't allocate %d pages (%2.1f megabytes).\n",
+ npages, (npages*pagesize) / (1024. * 1024.));
+ exit(3);
+ }
+ pn = 0;
+ for (i = 0; i < niter; i++) {
+ delta = gauss(sd, 0.0);
+ while (pn + delta < 0 || pn + delta > npages)
+ delta = gauss(sd, 0.0);
+ pn += delta;
+ if (debug)
+ printf("touch page %d\n", pn);
+ else
+ pages[pn * pagesize] = 1;
+ }
+}
+
+float
+gauss(sd, mean)
+ float sd, mean;
+{
+ register float qa, qb;
+
+ qa = sqrt(log(rnd()) * -2.0);
+ qb = 3.14159 * rnd();
+ return (qa * cos(qb) * sd + mean);
+}
+
+float
+rnd()
+{
+ static int seed = 1;
+ static int biggest = 0x7fffffff;
+
+ return ((float)rand(seed) / (float)biggest);
+}
+.vE
diff --git a/share/doc/papers/sysperf/a2.t b/share/doc/papers/sysperf/a2.t
new file mode 100644
index 0000000..e1882cf
--- /dev/null
+++ b/share/doc/papers/sysperf/a2.t
@@ -0,0 +1,117 @@
+.\" Copyright (c) 1985 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)a2.t 5.1 (Berkeley) 4/17/91
+.\"
+.SH
+run (shell script)
+.LP
+.vS
+#! /bin/csh -fx
+# Script to run benchmark programs.
+#
+date
+make clean; time make
+time syscall 100000
+time seqpage -p 7500 10
+time seqpage -v -p 7500 10
+time randpage -p 7500 30000
+time randpage -v -p 7500 30000
+time gausspage -p 7500 -s 1 30000
+time gausspage -p 7500 -s 10 30000
+time gausspage -p 7500 -s 30 30000
+time gausspage -p 7500 -s 40 30000
+time gausspage -p 7500 -s 50 30000
+time gausspage -p 7500 -s 60 30000
+time gausspage -p 7500 -s 80 30000
+time gausspage -p 7500 -s 10000 30000
+time csw 10000
+time signocsw 10000
+time pipeself 10000 512
+time pipeself 10000 4
+time udgself 10000 512
+time udgself 10000 4
+time pipediscard 10000 512
+time pipediscard 10000 4
+time udgdiscard 10000 512
+time udgdiscard 10000 4
+time pipeback 10000 512
+time pipeback 10000 4
+time udgback 10000 512
+time udgback 10000 4
+size forks
+time forks 1000 0
+time forks 1000 1024
+time forks 1000 102400
+size vforks
+time vforks 1000 0
+time vforks 1000 1024
+time vforks 1000 102400
+countenv
+size nulljob
+time execs 1000 0 nulljob
+time execs 1000 1024 nulljob
+time execs 1000 102400 nulljob
+time vexecs 1000 0 nulljob
+time vexecs 1000 1024 nulljob
+time vexecs 1000 102400 nulljob
+size bigjob
+time execs 1000 0 bigjob
+time execs 1000 1024 bigjob
+time execs 1000 102400 bigjob
+time vexecs 1000 0 bigjob
+time vexecs 1000 1024 bigjob
+time vexecs 1000 102400 bigjob
+# fill environment with ~1024 bytes
+setenv a 012345678901234567890123456789012345678901234567890123456780123456789
+setenv b 012345678901234567890123456789012345678901234567890123456780123456789
+setenv c 012345678901234567890123456789012345678901234567890123456780123456789
+setenv d 012345678901234567890123456789012345678901234567890123456780123456789
+setenv e 012345678901234567890123456789012345678901234567890123456780123456789
+setenv f 012345678901234567890123456789012345678901234567890123456780123456789
+setenv g 012345678901234567890123456789012345678901234567890123456780123456789
+setenv h 012345678901234567890123456789012345678901234567890123456780123456789
+setenv i 012345678901234567890123456789012345678901234567890123456780123456789
+setenv j 012345678901234567890123456789012345678901234567890123456780123456789
+setenv k 012345678901234567890123456789012345678901234567890123456780123456789
+setenv l 012345678901234567890123456789012345678901234567890123456780123456789
+setenv m 012345678901234567890123456789012345678901234567890123456780123456789
+setenv n 012345678901234567890123456789012345678901234567890123456780123456789
+setenv o 012345678901234567890123456789012345678901234567890123456780123456789
+countenv
+time execs 1000 0 nulljob
+time execs 1000 1024 nulljob
+time execs 1000 102400 nulljob
+time execs 1000 0 bigjob
+time execs 1000 1024 bigjob
+time execs 1000 102400 bigjob
+.vE
+.bp
diff --git a/share/doc/psd/00.contents b/share/doc/psd/00.contents
new file mode 100644
index 0000000..3866526
--- /dev/null
+++ b/share/doc/psd/00.contents
@@ -0,0 +1,191 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)00.contents 8.1 (Berkeley) 6/8/93
+.\"
+.OH '''PSD Contents'
+.EH 'PSD Contents'''
+.TL
+UNIX Programmer's Supplementary Documents (PSD)
+.sp
+\s-24.4 Berkeley Software Distribution\s+2
+.sp
+\fRJune, 1993\fR
+.PP
+This volume contains documents which supplement the manual pages in
+.I
+The
+.UX
+Programmer's Reference Manual
+.R
+for the 4.4BSD system as distributed by U.C. Berkeley.
+.SH
+Documents of Historical Interest
+.IP
+.tl 'The Unix Time\-Sharing System''PSD:1'
+.QP
+Dennis Ritchie and Ken Thompson's original paper about UNIX, reprinted
+from Communications of the ACM.
+
+.IP
+.tl 'Unix Implementation''PSD:2'
+.QP
+Ken Thompson's description of the implementation of the Version 7
+kernel and file system.
+
+.IP
+.tl 'The Unix I/O System''PSD:3'
+.QP
+Dennis Ritchie's overview of the I/O System of Version 7; still helpful for
+those writing device drivers.
+
+.IP
+.tl 'Unix Programming \- Second Edition ''PSD:4'
+.QP
+Describes the programming interface to the UNIX version 7 operating
+system and the standard I/O library. Should be supplemented by
+Kernighan and Pike, ``The UNIX Programming Environment'',
+Prentice-Hall, 1984 and especially by the Programmer Reference Manual
+section 2 (system calls) and 3 (library routines).
+
+.IP
+.tl 'Berkeley Software Architecture Manual (4.4 Edition)''PSD:5'
+.QP
+A concise and terse description of the system call interface
+provided in Berkeley Unix, as revised for 4.4BSD.
+This will never be a best seller.
+
+.SH
+Languages in common use
+.IP
+.tl 'The C Programming Language \- Reference Manual''PSD:6'
+.QP
+Official statement of the syntax of C.
+Should be supplemented by ``The C Programming Language,''
+B.W. Kernighan and D.M. Ritchie, Prentice-Hall, 1978, that
+contains a tutorial introduction and many examples.
+
+.IP
+.tl 'Berkeley Pascal User\'s Manual''PSD:7'
+.QP
+An implementation of this language popular for learning to program.
+
+.IP
+.tl 'A Portable Fortran 77 Compiler''PSD:8'
+.QP
+A revised version of the document which originally appeared in
+Volume 2b of the Bell Labs documentation;
+this version reflects the work done at Berkeley.
+
+.IP
+.tl 'Introduction to the f77 I/O Library''PSD:9'
+.QP
+A description of the revised input/output library for Fortran 77,
+reflecting work carried out at Berkeley.
+
+.SH
+Programming Tools
+.IP
+.tl 'Debugging with GDB: The GNU Source-Level Debugger''PSD:10'
+.QP
+How to debug programs using the source level \fIgdb\fP debugger
+(or how to debug programs without having to know much about machine language).
+
+.IP
+.tl 'A Tutorial Introduction to ADB''PSD:11'
+.QP
+How to debug programs using the assembly-language level \fIadb\fP debugger.
+
+.IP
+.tl 'Make \- A Program for Maintaining Computer Programs''PSD:12'
+.QP
+Indispensable tool for making sure large programs are properly
+compiled with minimal effort.
+
+.IP
+.tl 'An Introduction to the Revision Control System''PSD:13'
+.QP
+RCS is a user-contributed tool for working together with other people
+without stepping on each other's toes.
+An alternative to \fIsccs\fR for controlling software changes.
+
+.IP
+.tl 'An Introduction to the Source Code Control System''PSD:14'
+.QP
+A useful introductory article for those users with
+installations licensed for SCCS.
+
+.IP
+.tl 'YACC: Yet Another Compiler-Compiler''PSD:15'
+.QP
+Converts a BNF specification of a language and semantic actions
+written in C into a compiler for that language.
+
+.IP
+.tl 'LEX \- A Lexical Analyzer Generator''PSD:16'
+.QP
+Creates a recognizer for a set of regular expressions:
+each regular expression can be followed by arbitrary C code
+to be executed upon finding the regular expression.
+
+.IP
+.tl 'The M4 Macro Processor''PSD:17'
+.QP
+M4 is a macro processor useful in its own right and as a
+front-end for C, Ratfor, and Cobol.
+
+.IP
+.tl 'gprof: a Call Graph Execution Profiler''PSD:18'
+.QP
+A program to show the call graph and execution time of a program.
+Indispensable aid for improving the running time of almost everything.
+
+.SH
+Programming Libraries
+.IP
+.tl 'Screen Updating and Cursor Movement Optimization''PSD:19'
+.QP
+Describes the \fIcurses\fP package, an aid for writing screen-oriented,
+terminal-independent programs.
+
+.SH
+General Reference
+.IP
+.tl 'An Introductory 4.4BSD Interprocess Communication Tutorial''PSD:20'
+.QP
+How to write programs that use the Interprocess Communication Facilities
+of 4.4BSD.
+
+.IP
+.tl 'An Advanced 4.4BSD Interprocess Communication Tutorial''PSD:21'
+.QP
+The reference document (with some examples) for the Interprocess Communication
+Facilities of 4.4BSD.
diff --git a/share/doc/psd/05.sysman/0.t b/share/doc/psd/05.sysman/0.t
new file mode 100644
index 0000000..865e8ff
--- /dev/null
+++ b/share/doc/psd/05.sysman/0.t
@@ -0,0 +1,292 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 8.1 (Berkeley) 6/8/93
+.\"
+.if n .ND
+.TL
+Berkeley Software Architecture Manual
+.br
+4.4BSD Edition
+.AU
+William Joy, Robert Fabry,
+.AU
+Samuel Leffler, M. Kirk McKusick,
+.AU
+Michael Karels
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, CA 94720
+.EH 'PSD:5-%''4.4BSD Architecture Manual'
+.OH '4.4BSD Architecture Manual''PSD:5-%'
+.AB
+.FS
+* UNIX is a trademark of Bell Laboratories.
+.FE
+This document summarizes the facilities
+provided by the 4.4BSD version of the UNIX\|* operating system.
+It does not attempt to act as a tutorial for use of the system
+nor does it attempt to explain or justify the design of the
+system facilities.
+It gives neither motivation nor implementation details,
+in favor of brevity.
+.PP
+The first section describes the basic kernel functions
+provided to a UNIX process: process naming and protection,
+memory management, software interrupts,
+object references (descriptors), time and statistics functions,
+and resource controls.
+These facilities, as well as facilities for
+bootstrap, shutdown and process accounting,
+are provided solely by the kernel.
+.PP
+The second section describes the standard system
+abstractions for
+files and file systems,
+communication,
+terminal handling,
+and process control and debugging.
+These facilities are implemented by the operating system or by
+network server processes.
+.AE
+.LP
+.bp
+.ft B
+.br
+.sv 2
+.ce
+TABLE OF CONTENTS
+.ft R
+.LP
+.sp 1
+.nf
+.B "Introduction."
+.LP
+.if t .sp .5v
+.nf
+.B "0. Notation and types"
+.LP
+.if t .sp .5v
+.nf
+.B "1. Kernel primitives"
+.LP
+.if t .sp .5v
+.nf
+.nf
+\fB1.1. Processes and protection\fP
+1.1.1. Host and process identifiers
+1.1.2. Process creation and termination
+1.1.3. User and group ids
+1.1.4. Process groups
+.LP
+.nf
+\fB1.2. Memory management\fP
+1.2.1. Text, data and stack
+1.2.2. Mapping pages
+1.2.3. Page protection control
+1.2.4. Giving and getting advice
+1.2.5. Protection primitives
+.LP
+.if t .sp .5v
+.nf
+\fB1.3. Signals\fP
+1.3.1. Overview
+1.3.2. Signal types
+1.3.3. Signal handlers
+1.3.4. Sending signals
+1.3.5. Protecting critical sections
+1.3.6. Signal stacks
+.LP
+.if t .sp .5v
+.nf
+\fB1.4. Timing and statistics\fP
+1.4.1. Real time
+1.4.2. Interval time
+.LP
+.if t .sp .5v
+.nf
+\fB1.5. Descriptors\fP
+1.5.1. The reference table
+1.5.2. Descriptor properties
+1.5.3. Managing descriptor references
+1.5.4. Multiplexing requests
+1.5.5. Descriptor wrapping
+.LP
+.if t .sp .5v
+.nf
+\fB1.6. Resource controls\fP
+1.6.1. Process priorities
+1.6.2. Resource utilization
+1.6.3. Resource limits
+.LP
+.if t .sp .5v
+.nf
+\fB1.7. System operation support\fP
+1.7.1. Bootstrap operations
+1.7.2. Shutdown operations
+1.7.3. Accounting
+.bp
+.LP
+.if t .sp .5v
+.sp 1
+.nf
+\fB2. System facilities\fP
+.LP
+.if t .sp .5v
+.nf
+\fB2.1. Generic operations\fP
+2.1.1. Read and write
+2.1.2. Input/output control
+2.1.3. Non-blocking and asynchronous operations
+.LP
+.if t .sp .5v
+.nf
+\fB2.2. File system\fP
+2.2.1 Overview
+2.2.2. Naming
+2.2.3. Creation and removal
+2.2.3.1. Directory creation and removal
+2.2.3.2. File creation
+2.2.3.3. Creating references to devices
+2.2.3.4. Portal creation
+2.2.3.6. File, device, and portal removal
+2.2.4. Reading and modifying file attributes
+2.2.5. Links and renaming
+2.2.6. Extension and truncation
+2.2.7. Checking accessibility
+2.2.8. Locking
+2.2.9. Disc quotas
+.LP
+.if t .sp .5v
+.nf
+\fB2.3. Interprocess communication\fP
+2.3.1. Interprocess communication primitives
+2.3.1.1.\0 Communication domains
+2.3.1.2.\0 Socket types and protocols
+2.3.1.3.\0 Socket creation, naming and service establishment
+2.3.1.4.\0 Accepting connections
+2.3.1.5.\0 Making connections
+2.3.1.6.\0 Sending and receiving data
+2.3.1.7.\0 Scatter/gather and exchanging access rights
+2.3.1.8.\0 Using read and write with sockets
+2.3.1.9.\0 Shutting down halves of full-duplex connections
+2.3.1.10.\0 Socket and protocol options
+2.3.2. UNIX domain
+2.3.2.1. Types of sockets
+2.3.2.2. Naming
+2.3.2.3. Access rights transmission
+2.3.3. INTERNET domain
+2.3.3.1. Socket types and protocols
+2.3.3.2. Socket naming
+2.3.3.3. Access rights transmission
+2.3.3.4. Raw access
+.LP
+.if t .sp .5v
+.nf
+\fB2.4. Terminals and devices\fP
+2.4.1. Terminals
+2.4.1.1. Terminal input
+2.4.1.1.1 Input modes
+2.4.1.1.2 Interrupt characters
+2.4.1.1.3 Line editing
+2.4.1.2. Terminal output
+2.4.1.3. Terminal control operations
+2.4.1.4. Terminal hardware support
+2.4.2. Structured devices
+2.4.3. Unstructured devices
+.LP
+.if t .sp .5v
+.nf
+\fB2.5. Process control and debugging\fP
+.LP
+.if t .sp .5v
+.nf
+\fBI. Summary of facilities\fP
+.LP
+.de sh
+.ds RH \\$1
+.bp
+.NH \\*(ss
+\s+2\\$1\s0
+.PP
+.PP
+..
+.bp
+.ds ss 1
+.de _d
+.if t .ta .6i 2.1i 2.6i
+.\" 2.94 went to 2.6, 3.64 to 3.30
+.if n .ta .84i 2.6i 3.30i
+..
+.de _f
+.if t .ta .5i 1.25i 2.5i 3.5i
+.\" 3.5i went to 3.8i
+.if n .ta .7i 1.75i 3.8i 4.8i
+..
+.nr H1 -1
+.sh "Notation and types
+.PP
+The notation used to describe system calls is a variant of a
+C language call, consisting of a prototype call followed by
+declaration of parameters and results.
+An additional keyword \fBresult\fP, not part of the normal C language,
+is used to indicate which of the declared entities receive results.
+As an example, consider the \fIread\fP call, as described in
+section 2.1:
+.DS
+cc = read(fd, buf, nbytes);
+result int cc; int fd; result char *buf; int nbytes;
+.DE
+The first line shows how the \fIread\fP routine is called, with
+three parameters.
+As shown on the second line \fIcc\fP is an integer and \fIread\fP also
+returns information in the parameter \fIbuf\fP.
+.PP
+Description of all error conditions arising from each system call
+is not provided here; they appear in the programmer's manual.
+In particular, when accessed from the C language,
+many calls return a characteristic \-1 value
+when an error occurs, returning the error code in the global variable
+\fIerrno\fP.
+Other languages may present errors in different ways.
+.PP
+A number of system standard types are defined in the include file
+.I <sys/types.h>
+and used in the specifications here and in many C programs.
+These include \fBcaddr_t\fP giving a memory address (typically as
+a character pointer),
+\fBoff_t\fP giving a file offset (typically as a long integer),
+and a set of unsigned types \fBu_char\fP, \fBu_short\fP, \fBu_int\fP
+and \fBu_long\fP, shorthand names for \fBunsigned char\fP, \fBunsigned
+short\fP, etc.
diff --git a/share/doc/psd/05.sysman/1.0.t b/share/doc/psd/05.sysman/1.0.t
new file mode 100644
index 0000000..5a465a7
--- /dev/null
+++ b/share/doc/psd/05.sysman/1.0.t
@@ -0,0 +1,56 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.0.t 8.1 (Berkeley) 6/8/93
+.\"
+.ds ss 1
+.sh "Kernel primitives
+.PP
+The facilities available to a UNIX user process are logically
+divided into two parts: kernel facilities directly implemented by
+UNIX code running in the operating system, and system facilities
+implemented either by the system, or in cooperation with a
+\fIserver process\fP. These kernel facilities are described in
+this section 1.
+.PP
+The facilities implemented in the kernel are those which define the
+\fIUNIX virtual machine\fP in which each process runs.
+Like many real machines, this virtual machine has memory management hardware,
+an interrupt facility, timers and counters. The UNIX
+virtual machine also allows access to files and other objects through a set of
+\fIdescriptors\fP. Each descriptor resembles a device controller,
+and supports a set of operations. Like devices on real machines, some
+of which are internal to the machine and some of which are external,
+parts of the descriptor machinery are built-in to the operating system, while
+other parts are often implemented in server processes on other machines.
+The facilities provided through the descriptor machinery are described in
+section 2.
+.ds ss 2
diff --git a/share/doc/psd/05.sysman/1.1.t b/share/doc/psd/05.sysman/1.1.t
new file mode 100644
index 0000000..099a450
--- /dev/null
+++ b/share/doc/psd/05.sysman/1.1.t
@@ -0,0 +1,215 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.1.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "Processes and protection
+.NH 3
+Host and process identifiers
+.PP
+Each UNIX host has associated with it a 32-bit host id, and a host
+name of up to 64 characters (as defined by MAXHOSTNAMELEN in
+\fI<sys/param.h>\fP).
+These are set (by a privileged user)
+and returned by the calls:
+.DS
+sethostid(hostid)
+long hostid;
+
+hostid = gethostid();
+result long hostid;
+
+sethostname(name, len)
+char *name; int len;
+
+len = gethostname(buf, buflen)
+result int len; result char *buf; int buflen;
+.DE
+On each host runs a set of \fIprocesses\fP.
+Each process is largely independent of other processes,
+having its own protection domain, address space, timers, and
+an independent set of references to system or user implemented objects.
+.PP
+Each process in a host is named by an integer
+called the \fIprocess id\fP. This number is
+in the range 1-30000
+and is returned by
+the \fIgetpid\fP routine:
+.DS
+pid = getpid();
+result int pid;
+.DE
+On each UNIX host this identifier is guaranteed to be unique;
+in a multi-host environment, the (hostid, process id) pairs are
+guaranteed unique.
+.NH 3
+Process creation and termination
+.PP
+A new process is created by making a logical duplicate of an
+existing process:
+.DS
+pid = fork();
+result int pid;
+.DE
+The \fIfork\fP call returns twice, once in the parent process, where
+\fIpid\fP is the process identifier of the child,
+and once in the child process where \fIpid\fP is 0.
+The parent-child relationship induces a hierarchical structure on
+the set of processes in the system.
+.PP
+A process may terminate by executing an \fIexit\fP call:
+.DS
+exit(status)
+int status;
+.DE
+returning 8 bits of exit status to its parent.
+.PP
+When a child process exits or
+terminates abnormally, the parent process receives
+information about any
+event which caused termination of the child process. A
+second call provides a non-blocking interface and may also be used
+to retrieve information about resources consumed by the process during its
+lifetime.
+.DS
+#include <sys/wait.h>
+
+pid = wait(astatus);
+result int pid; result union wait *astatus;
+
+pid = wait3(astatus, options, arusage);
+result int pid; result union waitstatus *astatus;
+int options; result struct rusage *arusage;
+.DE
+.PP
+A process can overlay itself with the memory image of another process,
+passing the newly created process a set of parameters, using the call:
+.DS
+execve(name, argv, envp)
+char *name, **argv, **envp;
+.DE
+The specified \fIname\fP must be a file which is in a format recognized
+by the system, either a binary executable file or a file which causes
+the execution of a specified interpreter program to process its contents.
+.NH 3
+User and group ids
+.PP
+Each process in the system has associated with it two user-id's:
+a \fIreal user id\fP and a \fIeffective user id\fP, both 16 bit
+unsigned integers (type \fBuid_t\fP).
+Each process has an \fIreal accounting group id\fP and an \fIeffective
+accounting group id\fP and a set of
+\fIaccess group id's\fP. The group id's are 16 bit unsigned integers
+(type \fBgid_t\fP).
+Each process may be in several different access groups, with the maximum
+concurrent number of access groups a system compilation parameter,
+the constant NGROUPS in the file \fI<sys/param.h>\fP,
+guaranteed to be at least 8.
+.PP
+The real and effective user ids associated with a process are returned by:
+.DS
+ruid = getuid();
+result uid_t ruid;
+
+euid = geteuid();
+result uid_t euid;
+.DE
+the real and effective accounting group ids by:
+.DS
+rgid = getgid();
+result gid_t rgid;
+
+egid = getegid();
+result gid_t egid;
+.DE
+The access group id set is returned by a \fIgetgroups\fP call*:
+.DS
+ngroups = getgroups(gidsetsize, gidset);
+result int ngroups; int gidsetsize; result int gidset[gidsetsize];
+.DE
+.FS
+* The type of the gidset array in getgroups and setgroups
+remains integer for compatibility with 4.2BSD.
+It may change to \fBgid_t\fP in future releases.
+.FE
+.PP
+The user and group id's
+are assigned at login time using the \fIsetreuid\fP, \fIsetregid\fP,
+and \fIsetgroups\fP calls:
+.DS
+setreuid(ruid, euid);
+int ruid, euid;
+
+setregid(rgid, egid);
+int rgid, egid;
+
+setgroups(gidsetsize, gidset)
+int gidsetsize; int gidset[gidsetsize];
+.DE
+The \fIsetreuid\fP call sets both the real and effective user-id's,
+while the \fIsetregid\fP call sets both the real
+and effective accounting group id's.
+Unless the caller is the super-user, \fIruid\fP
+must be equal to either the current real or effective user-id,
+and \fIrgid\fP equal to either the current real or effective
+accounting group id. The \fIsetgroups\fP call is restricted
+to the super-user.
+.NH 3
+Process groups
+.PP
+Each process in the system is also normally associated with a \fIprocess
+group\fP. The group of processes in a process group is sometimes
+referred to as a \fIjob\fP and manipulated by high-level system
+software (such as the shell).
+The current process group of a process is returned by the
+\fIgetpgrp\fP call:
+.DS
+pgrp = getpgrp(pid);
+result int pgrp; int pid;
+.DE
+When a process is in a specific process group it may receive
+software interrupts affecting the group, causing the group to
+suspend or resume execution or to be interrupted or terminated.
+In particular, a system terminal has a process group and only processes
+which are in the process group of the terminal may read from the
+terminal, allowing arbitration of terminals among several different jobs.
+.PP
+The process group associated with a process may be changed by
+the \fIsetpgrp\fP call:
+.DS
+setpgrp(pid, pgrp);
+int pid, pgrp;
+.DE
+Newly created processes are assigned process id's distinct from all
+processes and process groups, and the same process group as their
+parent. A normal (unprivileged) process may set its process group equal
+to its process id. A privileged process may set the process group of any
+process to any value.
diff --git a/share/doc/psd/05.sysman/1.2.t b/share/doc/psd/05.sysman/1.2.t
new file mode 100644
index 0000000..c0a3b17
--- /dev/null
+++ b/share/doc/psd/05.sysman/1.2.t
@@ -0,0 +1,272 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.2.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "Memory management\(dg
+.NH 3
+Text, data and stack
+.PP
+.FS
+\(dg This section represents the interface planned for later
+releases of the system. Of the calls described in this section,
+only \fIsbrk\fP and \fIgetpagesize\fP are included in 4.3BSD.
+.FE
+Each process begins execution with three logical areas of memory
+called text, data and stack.
+The text area is read-only and shared, while the data and stack
+areas are private to the process. Both the data and stack areas may
+be extended and contracted on program request. The call
+.DS
+addr = sbrk(incr);
+result caddr_t addr; int incr;
+.DE
+changes the size of the data area by \fIincr\fP bytes and
+returns the new end of the data area, while
+.DS
+addr = sstk(incr);
+result caddr_t addr; int incr;
+.DE
+changes the size of the stack area.
+The stack area is also automatically extended as needed.
+On the VAX the text and data areas are adjacent in the P0 region,
+while the stack section is in the P1 region, and grows downward.
+.NH 3
+Mapping pages
+.PP
+The system supports sharing of data between processes
+by allowing pages to be mapped into memory. These mapped
+pages may be \fIshared\fP with other processes or \fIprivate\fP
+to the process.
+Protection and sharing options are defined in \fI<sys/mman.h>\fP as:
+.DS
+.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
+/* protections are chosen from these bits, or-ed together */
+#define PROT_READ 0x04 /* pages can be read */
+#define PROT_WRITE 0x02 /* pages can be written */
+#define PROT_EXEC 0x01 /* pages can be executed */
+.DE
+.DS
+.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
+/* flags contain mapping type, sharing type and options */
+/* mapping type; choose one */
+#define MAP_FILE 0x0001 /* mapped from a file or device */
+#define MAP_ANON 0x0002 /* allocated from memory, swap space */
+#define MAP_TYPE 0x000f /* mask for type field */
+.DE
+.DS
+.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
+/* sharing types; choose one */
+#define MAP_SHARED 0x0010 /* share changes */
+#define MAP_PRIVATE 0x0000 /* changes are private */
+.DE
+.DS
+.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
+/* other flags */
+#define MAP_FIXED 0x0020 /* map addr must be exactly as requested */
+#define MAP_INHERIT 0x0040 /* region is retained after exec */
+#define MAP_HASSEMAPHORE 0x0080 /* region may contain semaphores */
+#define MAP_NOPREALLOC 0x0100 /* do not preallocate space */
+.DE
+The cpu-dependent size of a page is returned by the
+\fIgetpagesize\fP system call:
+.DS
+pagesize = getpagesize();
+result int pagesize;
+.DE
+.LP
+The call:
+.DS
+maddr = mmap(addr, len, prot, flags, fd, pos);
+result caddr_t maddr; caddr_t addr; int *len, prot, flags, fd; off_t pos;
+.DE
+causes the pages starting at \fIaddr\fP and continuing
+for at most \fIlen\fP bytes to be mapped from the object represented by
+descriptor \fIfd\fP, starting at byte offset \fIpos\fP.
+The starting address of the region is returned;
+for the convenience of the system,
+it may differ from that supplied
+unless the MAP_FIXED flag is given,
+in which case the exact address will be used or the call will fail.
+The actual amount mapped is returned in \fIlen\fP.
+The \fIaddr\fP, \fIlen\fP, and \fIpos\fP parameters
+must all be multiples of the pagesize.
+A successful \fImmap\fP will delete any previous mapping
+in the allocated address range.
+The parameter \fIprot\fP specifies the accessibility
+of the mapped pages.
+The parameter \fIflags\fP specifies
+the type of object to be mapped,
+mapping options, and
+whether modifications made to
+this mapped copy of the page
+are to be kept \fIprivate\fP, or are to be \fIshared\fP with
+other references.
+Possible types include MAP_FILE,
+mapping a regular file or character-special device memory,
+and MAP_ANON, which maps memory not associated with any specific file.
+The file descriptor used for creating MAP_ANON regions is used only
+for naming, and may be given as \-1 if no name
+is associated with the region.\(dd
+.FS
+\(dd The current design does not allow a process
+to specify the location of swap space.
+In the future we may define an additional mapping type, MAP_SWAP,
+in which the file descriptor argument specifies a file
+or device to which swapping should be done.
+.FE
+The MAP_INHERIT flag allows a region to be inherited after an \fIexec\fP.
+The MAP_HASSEMAPHORE flag allows special handling for
+regions that may contain semaphores.
+The MAP_NOPREALLOC flag allows processes to allocate regions whose
+virtual address space, if fully allocated,
+would exceed the available memory plus swap resources.
+Such regions may get a SIGSEGV signal if they page fault and resources
+are not available to service their request;
+typically they would free up some resources via \fIunmap\fP so that
+when they return from the signal the page
+fault could be successfully completed.
+.PP
+A facility is provided to synchronize a mapped region with the file
+it maps; the call
+.DS
+msync(addr, len);
+caddr_t addr; int len;
+.DE
+writes any modified pages back to the filesystem and updates
+the file modification time.
+If \fIlen\fP is 0, all modified pages within the region containing \fIaddr\fP
+will be flushed;
+if \fIlen\fP is non-zero, only the pages containing \fIaddr\fP and \fIlen\fP
+succeeding locations will be examined.
+Any required synchronization of memory caches
+will also take place at this time.
+Filesystem operations on a file that is mapped for shared modifications
+are unpredictable except after an \fImsync\fP.
+.PP
+A mapping can be removed by the call
+.DS
+munmap(addr, len);
+caddr_t addr; int len;
+.DE
+This call deletes the mappings for the specified address range,
+and causes further references to addresses within the range
+to generate invalid memory references.
+.NH 3
+Page protection control
+.PP
+A process can control the protection of pages using the call
+.DS
+mprotect(addr, len, prot);
+caddr_t addr; int len, prot;
+.DE
+This call changes the specified pages to have protection \fIprot\fP\|.
+Not all implementations will guarantee protection on a page basis;
+the granularity of protection changes may be as large as an entire region.
+.NH 3
+Giving and getting advice
+.PP
+A process that has knowledge of its memory behavior may
+use the \fImadvise\fP call:
+.DS
+madvise(addr, len, behav);
+caddr_t addr; int len, behav;
+.DE
+\fIBehav\fP describes expected behavior, as given
+in \fI<sys/mman.h>\fP:
+.DS
+.ta \w'#define\ \ 'u +\w'MADV_SEQUENTIAL\ \ 'u +\w'00\ \ \ \ 'u
+#define MADV_NORMAL 0 /* no further special treatment */
+#define MADV_RANDOM 1 /* expect random page references */
+#define MADV_SEQUENTIAL 2 /* expect sequential references */
+#define MADV_WILLNEED 3 /* will need these pages */
+#define MADV_DONTNEED 4 /* don't need these pages */
+#define MADV_SPACEAVAIL 5 /* insure that resources are reserved */
+.DE
+Finally, a process may obtain information about whether pages are
+core resident by using the call
+.DS
+mincore(addr, len, vec)
+caddr_t addr; int len; result char *vec;
+.DE
+Here the current core residency of the pages is returned
+in the character array \fIvec\fP, with a value of 1 meaning
+that the page is in-core.
+.NH 3
+Synchronization primitives
+.PP
+Primitives are provided for synchronization using semaphores in shared memory.
+Semaphores must lie within a MAP_SHARED region with at least modes
+PROT_READ and PROT_WRITE.
+The MAP_HASSEMAPHORE flag must have been specified when the region was created.
+To acquire a lock a process calls:
+.DS
+value = mset(sem, wait)
+result int value; semaphore *sem; int wait;
+.DE
+\fIMset\fP indivisibly tests and sets the semaphore \fIsem\fP.
+If the the previous value is zero, the process has acquired the lock
+and \fImset\fP returns true immediately.
+Otherwise, if the \fIwait\fP flag is zero,
+failure is returned.
+If \fIwait\fP is true and the previous value is non-zero,
+\fImset\fP relinquishes the processor until notified that it should retry.
+.LP
+To release a lock a process calls:
+.DS
+mclear(sem)
+semaphore *sem;
+.DE
+\fIMclear\fP indivisibly tests and clears the semaphore \fIsem\fP.
+If the ``WANT'' flag is zero in the previous value,
+\fImclear\fP returns immediately.
+If the ``WANT'' flag is non-zero in the previous value,
+\fImclear\fP arranges for waiting processes to retry before returning.
+.PP
+Two routines provide services analogous to the kernel
+\fIsleep\fP and \fIwakeup\fP functions interpreted in the domain of
+shared memory.
+A process may relinquish the processor by calling \fImsleep\fP
+with a set semaphore:
+.DS
+msleep(sem)
+semaphore *sem;
+.DE
+If the semaphore is still set when it is checked by the kernel,
+the process will be put in a sleeping state
+until some other process issues an \fImwakeup\fP for the same semaphore
+within the region using the call:
+.DS
+mwakeup(sem)
+semaphore *sem;
+.DE
+An \fImwakeup\fP may awaken all sleepers on the semaphore,
+or may awaken only the next sleeper on a queue.
diff --git a/share/doc/psd/05.sysman/1.3.t b/share/doc/psd/05.sysman/1.3.t
new file mode 100644
index 0000000..f81a185
--- /dev/null
+++ b/share/doc/psd/05.sysman/1.3.t
@@ -0,0 +1,254 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.3.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "Signals
+.PP
+.NH 3
+Overview
+.PP
+The system defines a set of \fIsignals\fP that may be delivered
+to a process. Signal delivery resembles the occurrence of a hardware
+interrupt: the signal is blocked from further occurrence,
+the current process context is saved, and a new one
+is built. A process may specify
+the \fIhandler\fP to which a signal is delivered, or specify that
+the signal is to be \fIblocked\fP or \fIignored\fP. A process may
+also specify that a
+\fIdefault\fP action is to be taken when signals occur.
+.PP
+Some signals
+will cause a process to exit when they are not caught. This
+may be accompanied by creation of a \fIcore\fP image file, containing
+the current memory image of the process for use in post-mortem debugging.
+A process may choose to have signals delivered on a special
+stack, so that sophisticated software stack manipulations are possible.
+.PP
+All signals have the same \fIpriority\fP. If multiple signals
+are pending simultaneously, the order in which they are delivered
+to a process is implementation specific. Signal routines execute
+with the signal that caused their invocation \fIblocked\fP, but other
+signals may yet occur. Mechanisms are provided whereby critical sections
+of code may protect themselves against the occurrence of specified signals.
+.NH 3
+Signal types
+.PP
+The signals defined by the system fall into one of
+five classes: hardware conditions,
+software conditions, input/output notification, process control, or
+resource control.
+The set of signals is defined in the file \fI<signal.h>\fP.
+.PP
+Hardware signals are derived from exceptional conditions which
+may occur during
+execution. Such signals include SIGFPE representing floating
+point and other arithmetic exceptions, SIGILL for illegal instruction
+execution, SIGSEGV for addresses outside the currently assigned
+area of memory, and SIGBUS for accesses that violate memory
+protection constraints.
+Other, more cpu-specific hardware signals exist,
+such as those for the various customer-reserved instructions on
+the VAX (SIGIOT, SIGEMT, and SIGTRAP).
+.PP
+Software signals reflect interrupts generated by user request:
+SIGINT for the normal interrupt signal; SIGQUIT for the more
+powerful \fIquit\fP signal, that normally causes a core image
+to be generated; SIGHUP and SIGTERM that cause graceful
+process termination, either because a user has ``hung up'', or
+by user or program request; and SIGKILL, a more powerful termination
+signal which a process cannot catch or ignore.
+Programs may define their own asynchronous events using SIGUSR1
+and SIGUSR2.
+Other software signals (SIGALRM, SIGVTALRM, SIGPROF)
+indicate the expiration of interval timers.
+.PP
+A process can request notification via a SIGIO signal
+when input or output is possible
+on a descriptor, or when a \fInon-blocking\fP operation completes.
+A process may request to receive a SIGURG signal when an
+urgent condition arises.
+.PP
+A process may be \fIstopped\fP by a signal sent to it or the members
+of its process group. The SIGSTOP signal is a powerful stop
+signal, because it cannot be caught. Other stop signals
+SIGTSTP, SIGTTIN, and SIGTTOU are used when a user request, input
+request, or output request respectively is the reason for stopping the process.
+A SIGCONT signal is sent to a process when it is
+continued from a stopped state.
+Processes may receive notification with a SIGCHLD signal when
+a child process changes state, either by stopping or by terminating.
+.PP
+Exceeding resource limits may cause signals to be generated.
+SIGXCPU occurs when a process nears its CPU time limit and SIGXFSZ
+warns that the limit on file size creation has been reached.
+.NH 3
+Signal handlers
+.PP
+A process has a handler associated with each signal.
+The handler controls the way the signal is delivered.
+The call
+.DS
+#include <signal.h>
+
+._f
+struct sigvec {
+ int (*sv_handler)();
+ int sv_mask;
+ int sv_flags;
+};
+
+sigvec(signo, sv, osv)
+int signo; struct sigvec *sv; result struct sigvec *osv;
+.DE
+assigns interrupt handler address \fIsv_handler\fP to signal \fIsigno\fP.
+Each handler address
+specifies either an interrupt routine for the signal, that the
+signal is to be ignored,
+or that a default action (usually process termination) is to occur
+if the signal occurs.
+The constants
+SIG_IGN and SIG_DEF used as values for \fIsv_handler\fP
+cause ignoring or defaulting of a condition.
+The \fIsv_mask\fP value specifies the
+signal mask to be used when the handler is invoked; it implicitly includes
+the signal which invoked the handler.
+Signal masks include one bit for each signal;
+the mask for a signal \fIsigno\fP is provided by the macro
+\fIsigmask\fP(\fIsigno\fP), from \fI<signal.h>\fP.
+\fISv_flags\fP specifies whether system calls should be
+restarted if the signal handler returns and
+whether the handler should operate on the normal run-time
+stack or a special signal stack (see below). If \fIosv\fP
+is non-zero, the previous signal vector is returned.
+.PP
+When a signal condition arises for a process, the signal
+is added to a set of signals pending for the process.
+If the signal is not currently \fIblocked\fP by the process
+then it will be delivered. The process of signal delivery
+adds the signal to be delivered and those signals
+specified in the associated signal
+handler's \fIsv_mask\fP to a set of those \fImasked\fP
+for the process, saves the current process context,
+and places the process in the context of the signal
+handling routine. The call is arranged so that if the signal
+handling routine exits normally the signal mask will be restored
+and the process will resume execution in the original context.
+If the process wishes to resume in a different context, then
+it must arrange to restore the signal mask itself.
+.PP
+The mask of \fIblocked\fP signals is independent of handlers for
+signals. It delays signals from being delivered much as a
+raised hardware interrupt priority level delays hardware interrupts.
+Preventing an interrupt from occurring by changing the handler is analogous to
+disabling a device from further interrupts.
+.PP
+The signal handling routine \fIsv_handler\fP is called by a C call
+of the form
+.DS
+(*sv_handler)(signo, code, scp);
+int signo; long code; struct sigcontext *scp;
+.DE
+The \fIsigno\fP gives the number of the signal that occurred, and
+the \fIcode\fP, a word of information supplied by the hardware.
+The \fIscp\fP parameter is a pointer to a machine-dependent
+structure containing the information for restoring the
+context before the signal.
+.NH 3
+Sending signals
+.PP
+A process can send a signal to another process or group of processes
+with the calls:
+.DS
+kill(pid, signo)
+int pid, signo;
+
+killpgrp(pgrp, signo)
+int pgrp, signo;
+.DE
+Unless the process sending the signal is privileged,
+it must have the same effective user id as the process receiving the signal.
+.PP
+Signals are also sent implicitly from a terminal device to the
+process group associated with the terminal when certain input characters
+are typed.
+.NH 3
+Protecting critical sections
+.PP
+To block a section of code against one or more signals, a \fIsigblock\fP
+call may be used to add a set of signals to the existing mask, returning
+the old mask:
+.DS
+oldmask = sigblock(mask);
+result long oldmask; long mask;
+.DE
+The old mask can then be restored later with \fIsigsetmask\fP\|,
+.DS
+oldmask = sigsetmask(mask);
+result long oldmask; long mask;
+.DE
+The \fIsigblock\fP call can be used to read the current mask
+by specifying an empty \fImask\fP\|.
+.PP
+It is possible to check conditions with some signals blocked,
+and then to pause waiting for a signal and restoring the mask, by using:
+.DS
+sigpause(mask);
+long mask;
+.DE
+.NH 3
+Signal stacks
+.PP
+Applications that maintain complex or fixed size stacks can use
+the call
+.DS
+._f
+struct sigstack {
+ caddr_t ss_sp;
+ int ss_onstack;
+};
+
+sigstack(ss, oss)
+struct sigstack *ss; result struct sigstack *oss;
+.DE
+to provide the system with a stack based at \fIss_sp\fP for delivery
+of signals. The value \fIss_onstack\fP indicates whether the
+process is currently on the signal stack,
+a notion maintained in software by the system.
+.PP
+When a signal is to be delivered, the system checks whether
+the process is on a signal stack. If not, then the process is switched
+to the signal stack for delivery, with the return from the signal
+arranged to restore the previous stack.
+.PP
+If the process wishes to take a non-local exit from the signal routine,
+or run code from the signal stack that uses a different stack,
+a \fIsigstack\fP call should be used to reset the signal stack.
diff --git a/share/doc/psd/05.sysman/1.4.t b/share/doc/psd/05.sysman/1.4.t
new file mode 100644
index 0000000..a67a5ce
--- /dev/null
+++ b/share/doc/psd/05.sysman/1.4.t
@@ -0,0 +1,137 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.4.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "Timers
+.NH 3
+Real time
+.PP
+The system's notion of the current Greenwich time and the current time
+zone is set and returned by the call by the calls:
+.DS
+#include <sys/time.h>
+
+settimeofday(tvp, tzp);
+struct timeval *tp;
+struct timezone *tzp;
+
+gettimeofday(tp, tzp);
+result struct timeval *tp;
+result struct timezone *tzp;
+.DE
+where the structures are defined in \fI<sys/time.h>\fP as:
+.DS
+._f
+struct timeval {
+ long tv_sec; /* seconds since Jan 1, 1970 */
+ long tv_usec; /* and microseconds */
+};
+
+struct timezone {
+ int tz_minuteswest; /* of Greenwich */
+ int tz_dsttime; /* type of dst correction to apply */
+};
+.DE
+The precision of the system clock is hardware dependent.
+Earlier versions of UNIX contained only a 1-second resolution version
+of this call, which remains as a library routine:
+.DS
+time(tvsec)
+result long *tvsec;
+.DE
+returning only the tv_sec field from the \fIgettimeofday\fP call.
+.NH 3
+Interval time
+.PP
+The system provides each process with three interval timers,
+defined in \fI<sys/time.h>\fP:
+.DS
+._d
+#define ITIMER_REAL 0 /* real time intervals */
+#define ITIMER_VIRTUAL 1 /* virtual time intervals */
+#define ITIMER_PROF 2 /* user and system virtual time */
+.DE
+The ITIMER_REAL timer decrements
+in real time. It could be used by a library routine to
+maintain a wakeup service queue. A SIGALRM signal is delivered
+when this timer expires.
+.PP
+The ITIMER_VIRTUAL timer decrements in process virtual time.
+It runs only when the process is executing. A SIGVTALRM signal
+is delivered when it expires.
+.PP
+The ITIMER_PROF timer decrements both in process virtual time and when
+the system is running on behalf of the process.
+It is designed to be used by processes to statistically profile
+their execution.
+A SIGPROF signal is delivered when it expires.
+.PP
+A timer value is defined by the \fIitimerval\fP structure:
+.DS
+._f
+struct itimerval {
+ struct timeval it_interval; /* timer interval */
+ struct timeval it_value; /* current value */
+};
+.DE
+and a timer is set or read by the call:
+.DS
+getitimer(which, value);
+int which; result struct itimerval *value;
+
+setitimer(which, value, ovalue);
+int which; struct itimerval *value; result struct itimerval *ovalue;
+.DE
+The third argument to \fIsetitimer\fP specifies an optional structure
+to receive the previous contents of the interval timer.
+A timer can be disabled by specifying a timer value of 0.
+.PP
+The system rounds argument timer intervals to be not less than the
+resolution of its clock. This clock resolution can be determined
+by loading a very small value into a timer and reading the timer back to
+see what value resulted.
+.PP
+The \fIalarm\fP system call of earlier versions of UNIX is provided
+as a library routine using the ITIMER_REAL timer. The process
+profiling facilities of earlier versions of UNIX
+remain because
+it is not always possible to guarantee
+the automatic restart of system calls after
+receipt of a signal.
+The \fIprofil\fP call arranges for the kernel to begin gathering
+execution statistics for a process:
+.DS
+profil(buf, bufsize, offset, scale);
+result char *buf; int bufsize, offset, scale;
+.DE
+This begins sampling of the program counter, with statistics maintained
+in the user-provided buffer.
diff --git a/share/doc/psd/05.sysman/1.5.t b/share/doc/psd/05.sysman/1.5.t
new file mode 100644
index 0000000..e642e2d
--- /dev/null
+++ b/share/doc/psd/05.sysman/1.5.t
@@ -0,0 +1,225 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.5.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh Descriptors
+.PP
+.NH 3
+The reference table
+.PP
+Each process has access to resources through
+\fIdescriptors\fP. Each descriptor is a handle allowing
+the process to reference objects such as files, devices
+and communications links.
+.PP
+Rather than allowing processes direct access to descriptors, the system
+introduces a level of indirection, so that descriptors may be shared
+between processes. Each process has a \fIdescriptor reference table\fP,
+containing pointers to the actual descriptors. The descriptors
+themselves thus have multiple references, and are reference counted by the
+system.
+.PP
+Each process has a fixed size descriptor reference table, where
+the size is returned by the \fIgetdtablesize\fP call:
+.DS
+nds = getdtablesize();
+result int nds;
+.DE
+and guaranteed to be at least 20. The entries in the descriptor reference
+table are referred to by small integers; for example if there
+are 20 slots they are numbered 0 to 19.
+.NH 3
+Descriptor properties
+.PP
+Each descriptor has a logical set of properties maintained
+by the system and defined by its \fItype\fP.
+Each type supports a set of operations;
+some operations, such as reading and writing, are common to several
+abstractions, while others are unique.
+The generic operations applying to many of these types are described
+in section 2.1. Naming contexts, files and directories are described in
+section 2.2. Section 2.3 describes communications domains and sockets.
+Terminals and (structured and unstructured) devices are described
+in section 2.4.
+.NH 3
+Managing descriptor references
+.PP
+A duplicate of a descriptor reference may be made by doing
+.DS
+new = dup(old);
+result int new; int old;
+.DE
+returning a copy of descriptor reference \fIold\fP indistinguishable from
+the original. The \fInew\fP chosen by the system will be the
+smallest unused descriptor reference slot.
+A copy of a descriptor reference may be made in a specific slot
+by doing
+.DS
+dup2(old, new);
+int old, new;
+.DE
+The \fIdup2\fP call causes the system to deallocate the descriptor reference
+current occupying slot \fInew\fP, if any, replacing it with a reference
+to the same descriptor as old.
+This deallocation is also performed by:
+.DS
+close(old);
+int old;
+.DE
+.NH 3
+Multiplexing requests
+.PP
+The system provides a
+standard way to do
+synchronous and asynchronous multiplexing of operations.
+.PP
+Synchronous multiplexing is performed by using the \fIselect\fP call
+to examine the state of multiple descriptors simultaneously,
+and to wait for state changes on those descriptors.
+Sets of descriptors of interest are specified as bit masks,
+as follows:
+.DS
+#include <sys/types.h>
+
+nds = select(nd, in, out, except, tvp);
+result int nds; int nd; result fd_set *in, *out, *except;
+struct timeval *tvp;
+
+FD_ZERO(&fdset);
+FD_SET(fd, &fdset);
+FD_CLR(fd, &fdset);
+FD_ISSET(fd, &fdset);
+int fs; fs_set fdset;
+.DE
+The \fIselect\fP call examines the descriptors
+specified by the
+sets \fIin\fP, \fIout\fP and \fIexcept\fP, replacing
+the specified bit masks by the subsets that select true for input,
+output, and exceptional conditions respectively (\fInd\fP
+indicates the number of file descriptors specified by the bit masks).
+If any descriptors meet the following criteria,
+then the number of such descriptors is returned in \fInds\fP and the
+bit masks are updated.
+.if n .ds bu *
+.if t .ds bu \(bu
+.IP \*(bu
+A descriptor selects for input if an input oriented operation
+such as \fIread\fP or \fIreceive\fP is possible, or if a
+connection request may be accepted (see section 2.3.1.4).
+.IP \*(bu
+A descriptor selects for output if an output oriented operation
+such as \fIwrite\fP or \fIsend\fP is possible, or if an operation
+that was ``in progress'', such as connection establishment,
+has completed (see section 2.1.3).
+.IP \*(bu
+A descriptor selects for an exceptional condition if a condition
+that would cause a SIGURG signal to be generated exists (see section 1.3.2),
+or other device-specific events have occurred.
+.LP
+If none of the specified conditions is true, the operation
+waits for one of the conditions to arise,
+blocking at most the amount of time specified by \fItvp\fP.
+If \fItvp\fP is given as 0, the \fIselect\fP waits indefinitely.
+.PP
+Options affecting I/O on a descriptor
+may be read and set by the call:
+.DS
+._d
+dopt = fcntl(d, cmd, arg)
+result int dopt; int d, cmd, arg;
+
+/* interesting values for cmd */
+#define F_SETFL 3 /* set descriptor options */
+#define F_GETFL 4 /* get descriptor options */
+#define F_SETOWN 5 /* set descriptor owner (pid/pgrp) */
+#define F_GETOWN 6 /* get descriptor owner (pid/pgrp) */
+.DE
+The F_SETFL \fIcmd\fP may be used to set a descriptor in
+non-blocking I/O mode and/or enable signaling when I/O is
+possible. F_SETOWN may be used to specify a process or process
+group to be signaled when using the latter mode of operation
+or when urgent indications arise.
+.PP
+Operations on non-blocking descriptors will
+either complete immediately,
+note an error EWOULDBLOCK,
+partially complete an input or output operation returning a partial count,
+or return an error EINPROGRESS noting that the requested operation is
+in progress.
+A descriptor which has signalling enabled will cause the specified process
+and/or process group
+be signaled, with a SIGIO for input, output, or in-progress
+operation complete, or
+a SIGURG for exceptional conditions.
+.PP
+For example, when writing to a terminal
+using non-blocking output,
+the system will accept only as much data as there is buffer space for
+and return; when making a connection on a \fIsocket\fP, the operation may
+return indicating that the connection establishment is ``in progress''.
+The \fIselect\fP facility can be used to determine when further
+output is possible on the terminal, or when the connection establishment
+attempt is complete.
+.NH 3
+Descriptor wrapping.\(dg
+.PP
+.FS
+\(dg The facilities described in this section are not included
+in 4.3BSD.
+.FE
+A user process may build descriptors of a specified type by
+\fIwrapping\fP a communications channel with a system supplied protocol
+translator:
+.DS
+new = wrap(old, proto)
+result int new; int old; struct dprop *proto;
+.DE
+Operations on the descriptor \fIold\fP are then translated by the
+system provided protocol translator into requests on the underlying
+object \fIold\fP in a way defined by the protocol.
+The protocols supported by the kernel may vary from system to system
+and are described in the programmers manual.
+.PP
+Protocols may be based on communications multiplexing or a rights-passing
+style of handling multiple requests made on the same object. For instance,
+a protocol for implementing a file abstraction may or may not include
+locally generated ``read-ahead'' requests. A protocol that provides for
+read-ahead may provide higher performance but have a more difficult
+implementation.
+.PP
+Another example is the terminal driving facilities. Normally a terminal
+is associated with a communications line, and the terminal type
+and standard terminal access protocol are wrapped around a synchronous
+communications line and given to the user. If a virtual terminal
+is required, the terminal driver can be wrapped around a communications
+link, the other end of which is held by a virtual terminal protocol
+interpreter.
diff --git a/share/doc/psd/05.sysman/1.6.t b/share/doc/psd/05.sysman/1.6.t
new file mode 100644
index 0000000..109d271
--- /dev/null
+++ b/share/doc/psd/05.sysman/1.6.t
@@ -0,0 +1,135 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.6.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "Resource controls
+.NH 3
+Process priorities
+.PP
+The system gives CPU scheduling priority to processes that have not used
+CPU time recently. This tends to favor interactive processes and
+processes that execute only for short periods.
+It is possible to determine the priority currently
+assigned to a process, process group, or the processes of a specified user,
+or to alter this priority using the calls:
+.DS
+._d
+#define PRIO_PROCESS 0 /* process */
+#define PRIO_PGRP 1 /* process group */
+#define PRIO_USER 2 /* user id */
+
+prio = getpriority(which, who);
+result int prio; int which, who;
+
+setpriority(which, who, prio);
+int which, who, prio;
+.DE
+The value \fIprio\fP is in the range \-20 to 20.
+The default priority is 0; lower priorities cause more
+favorable execution.
+The \fIgetpriority\fP call returns the highest priority (lowest numerical value)
+enjoyed by any of the specified processes.
+The \fIsetpriority\fP call sets the priorities of all of the
+specified processes to the specified value.
+Only the super-user may lower priorities.
+.NH 3
+Resource utilization
+.PP
+The resources used by a process are returned by a \fIgetrusage\fP call,
+returning information in a structure defined in \fI<sys/resource.h>\fP:
+.DS
+._d
+#define RUSAGE_SELF 0 /* usage by this process */
+#define RUSAGE_CHILDREN -1 /* usage by all children */
+
+getrusage(who, rusage)
+int who; result struct rusage *rusage;
+
+._f
+struct rusage {
+ struct timeval ru_utime; /* user time used */
+ struct timeval ru_stime; /* system time used */
+ int ru_maxrss; /* maximum core resident set size: kbytes */
+ int ru_ixrss; /* integral shared memory size (kbytes*sec) */
+ int ru_idrss; /* unshared data memory size */
+ int ru_isrss; /* unshared stack memory size */
+ int ru_minflt; /* page-reclaims */
+ int ru_majflt; /* page faults */
+ int ru_nswap; /* swaps */
+ int ru_inblock; /* block input operations */
+ int ru_oublock; /* block output operations */
+ int ru_msgsnd; /* messages sent */
+ int ru_msgrcv; /* messages received */
+ int ru_nsignals; /* signals received */
+ int ru_nvcsw; /* voluntary context switches */
+ int ru_nivcsw; /* involuntary context switches */
+};
+.DE
+The \fIwho\fP parameter specifies whose resource usage is to be returned.
+The resources used by the current process, or by all
+the terminated children of the current process may be requested.
+.NH 3
+Resource limits
+.PP
+The resources of a process for which limits are controlled by the
+kernel are defined in \fI<sys/resource.h>\fP, and controlled by the
+\fIgetrlimit\fP and \fIsetrlimit\fP calls:
+.DS
+._d
+#define RLIMIT_CPU 0 /* cpu time in milliseconds */
+#define RLIMIT_FSIZE 1 /* maximum file size */
+#define RLIMIT_DATA 2 /* maximum data segment size */
+#define RLIMIT_STACK 3 /* maximum stack segment size */
+#define RLIMIT_CORE 4 /* maximum core file size */
+#define RLIMIT_RSS 5 /* maximum resident set size */
+
+#define RLIM_NLIMITS 6
+
+#define RLIM_INFINITY 0x7f\&f\&f\&f\&f\&f\&f
+
+._f
+struct rlimit {
+ int rlim_cur; /* current (soft) limit */
+ int rlim_max; /* hard limit */
+};
+
+getrlimit(resource, rlp)
+int resource; result struct rlimit *rlp;
+
+setrlimit(resource, rlp)
+int resource; struct rlimit *rlp;
+.DE
+.PP
+Only the super-user can raise the maximum limits.
+Other users may only
+alter \fIrlim_cur\fP within the range from 0 to \fIrlim_max\fP
+or (irreversibly) lower \fIrlim_max\fP.
diff --git a/share/doc/psd/05.sysman/1.7.t b/share/doc/psd/05.sysman/1.7.t
new file mode 100644
index 0000000..09e1a02
--- /dev/null
+++ b/share/doc/psd/05.sysman/1.7.t
@@ -0,0 +1,100 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.7.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "System operation support
+.PP
+Unless noted otherwise,
+the calls in this section are permitted only to a privileged user.
+.NH 3
+Bootstrap operations
+.PP
+The call
+.DS
+mount(blkdev, dir, ronly);
+char *blkdev, *dir; int ronly;
+.DE
+extends the UNIX name space. The \fImount\fP call specifies
+a block device \fIblkdev\fP containing a UNIX file system
+to be made available starting at \fIdir\fP. If \fIronly\fP is
+set then the file system is read-only; writes to the file system
+will not be permitted and access times will not be updated
+when files are referenced.
+\fIDir\fP is normally a name in the root directory.
+.PP
+The call
+.DS
+swapon(blkdev, size);
+char *blkdev; int size;
+.DE
+specifies a device to be made available for paging and swapping.
+.PP
+.NH 3
+Shutdown operations
+.PP
+The call
+.DS
+unmount(dir);
+char *dir;
+.DE
+unmounts the file system mounted on \fIdir\fP.
+This call will succeed only if the file system is
+not currently being used.
+.PP
+The call
+.DS
+sync();
+.DE
+schedules input/output to clean all system buffer caches.
+(This call does not require privileged status.)
+.PP
+The call
+.DS
+reboot(how)
+int how;
+.DE
+causes a machine halt or reboot. The call may request a reboot
+by specifying \fIhow\fP as RB_AUTOBOOT, or that the machine be halted
+with RB_HALT. These constants are defined in \fI<sys/reboot.h>\fP.
+.NH 3
+Accounting
+.PP
+The system optionally keeps an accounting record in a file
+for each process that exits on the system.
+The format of this record is beyond the scope of this document.
+The accounting may be enabled to a file \fIname\fP by doing
+.DS
+acct(path);
+char *path;
+.DE
+If \fIpath\fP is null, then accounting is disabled. Otherwise,
+the named file becomes the accounting file.
diff --git a/share/doc/psd/05.sysman/2.0.t b/share/doc/psd/05.sysman/2.0.t
new file mode 100644
index 0000000..ca44bc2
--- /dev/null
+++ b/share/doc/psd/05.sysman/2.0.t
@@ -0,0 +1,83 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.0.t 8.1 (Berkeley) 6/8/93
+.\"
+.ds ss 1
+.sh "System facilities
+This section discusses the system facilities that
+are not considered part of the kernel.
+.PP
+The system abstractions described are:
+.IP "Directory contexts
+.br
+A directory context is a position in the UNIX file system name
+space. Operations on files and other named objects in a file system are
+always specified relative to such a context.
+.IP "Files
+.br
+Files are used to store uninterpreted sequence of bytes on which
+random access \fIreads\fP and \fIwrites\fP may occur.
+Pages from files may also be mapped into process address space.\(dg
+A directory may be read as a file.
+.FS
+\(dg Support for mapping files is not included in the 4.3 release.
+.FE
+.IP "Communications domains
+.br
+A communications domain represents
+an interprocess communications environment, such as the communications
+facilities of the UNIX system,
+communications in the INTERNET, or the resource sharing protocols
+and access rights of a resource sharing system on a local network.
+.IP "Sockets
+.br
+A socket is an endpoint of communication and the focal
+point for IPC in a communications domain. Sockets may be created in pairs,
+or given names and used to rendezvous with other sockets
+in a communications domain, accepting connections from these
+sockets or exchanging messages with them. These operations model
+a labeled or unlabeled communications graph, and can be used in a
+wide variety of communications domains. Sockets can have different
+\fItypes\fP\| to provide different semantics of communication,
+increasing the flexibility of the model.
+.IP "Terminals and other devices
+.br
+Devices include
+terminals, providing input editing and interrupt generation
+and output flow control and editing, magnetic tapes,
+disks and other peripherals. They often support the generic
+\fIread\fP and \fIwrite\fP operations as well as a number of \fIioctl\fP\|s.
+.IP "Processes
+.br
+Process descriptors provide facilities for control and debugging of
+other processes.
+.ds ss 2
diff --git a/share/doc/psd/05.sysman/2.1.t b/share/doc/psd/05.sysman/2.1.t
new file mode 100644
index 0000000..ef25887
--- /dev/null
+++ b/share/doc/psd/05.sysman/2.1.t
@@ -0,0 +1,138 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.1.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "Generic operations
+.PP
+.PP
+Many system abstractions support the
+operations \fIread\fP, \fIwrite\fP and \fIioctl\fP. We describe
+the basics of these common primitives here.
+Similarly, the mechanisms whereby normally synchronous operations
+may occur in a non-blocking or asynchronous fashion are
+common to all system-defined abstractions and are described here.
+.NH 3
+Read and write
+.PP
+The \fIread\fP and \fIwrite\fP system calls can be applied
+to communications channels, files, terminals and devices.
+They have the form:
+.DS
+cc = read(fd, buf, nbytes);
+result int cc; int fd; result caddr_t buf; int nbytes;
+
+cc = write(fd, buf, nbytes);
+result int cc; int fd; caddr_t buf; int nbytes;
+.DE
+The \fIread\fP call transfers as much data as possible from the
+object defined by \fIfd\fP to the buffer at address \fIbuf\fP of
+size \fInbytes\fP. The number of bytes transferred is
+returned in \fIcc\fP, which is \-1 if a return occurred before
+any data was transferred because of an error or use of non-blocking
+operations.
+.PP
+The \fIwrite\fP call transfers data from the buffer to the
+object defined by \fIfd\fP. Depending on the type of \fIfd\fP,
+it is possible that the \fIwrite\fP call will accept some portion
+of the provided bytes; the user should resubmit the other bytes
+in a later request in this case.
+Error returns because of interrupted or otherwise incomplete operations
+are possible.
+.PP
+Scattering of data on input or gathering of data for output
+is also possible using an array of input/output vector descriptors.
+The type for the descriptors is defined in \fI<sys/uio.h>\fP as:
+.DS
+._f
+struct iovec {
+ caddr_t iov_msg; /* base of a component */
+ int iov_len; /* length of a component */
+};
+.DE
+The calls using an array of descriptors are:
+.DS
+cc = readv(fd, iov, iovlen);
+result int cc; int fd; struct iovec *iov; int iovlen;
+
+cc = writev(fd, iov, iovlen);
+result int cc; int fd; struct iovec *iov; int iovlen;
+.DE
+Here \fIiovlen\fP is the count of elements in the \fIiov\fP array.
+.NH 3
+Input/output control
+.PP
+Control operations on an object are performed by the \fIioctl\fP
+operation:
+.DS
+ioctl(fd, request, buffer);
+int fd, request; caddr_t buffer;
+.DE
+This operation causes the specified \fIrequest\fP to be performed
+on the object \fIfd\fP. The \fIrequest\fP parameter specifies
+whether the argument buffer is to be read, written, read and written,
+or is not needed, and also the size of the buffer, as well as the
+request.
+Different descriptor types and subtypes within descriptor types
+may use distinct \fIioctl\fP requests. For example,
+operations on terminals control flushing of input and output
+queues and setting of terminal parameters; operations on
+disks cause formatting operations to occur; operations on tapes
+control tape positioning.
+.PP
+The names for basic control operations are defined in \fI<sys/ioctl.h>\fP.
+.NH 3
+Non-blocking and asynchronous operations
+.PP
+A process that wishes to do non-blocking operations on one of
+its descriptors sets the descriptor in non-blocking mode as
+described in section 1.5.4. Thereafter the \fIread\fP call will
+return a specific EWOULDBLOCK error indication if there is no data to be
+\fIread\fP. The process may
+\fIselect\fP the associated descriptor to determine when a read is
+possible.
+.PP
+Output attempted when a descriptor can accept less than is requested
+will either accept some of the provided data, returning a shorter than normal
+length, or return an error indicating that the operation would block.
+More output can be performed as soon as a \fIselect\fP call indicates
+the object is writeable.
+.PP
+Operations other than data input or output
+may be performed on a descriptor in a non-blocking fashion.
+These operations will return with a characteristic error indicating
+that they are in progress
+if they cannot complete immediately. The descriptor
+may then be \fIselect\fPed for \fIwrite\fP to find out
+when the operation has been completed. When \fIselect\fP indicates
+the descriptor is writeable, the operation has completed.
+Depending on the nature of the descriptor and the operation,
+additional activity may be started or the new state may be tested.
diff --git a/share/doc/psd/05.sysman/2.2.t b/share/doc/psd/05.sysman/2.2.t
new file mode 100644
index 0000000..996e9b5
--- /dev/null
+++ b/share/doc/psd/05.sysman/2.2.t
@@ -0,0 +1,470 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.2.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "File system
+.NH 3
+Overview
+.PP
+The file system abstraction provides access to a hierarchical
+file system structure.
+The file system contains directories (each of which may contain
+other sub-directories) as well as files and references to other
+objects such as devices and inter-process communications sockets.
+.PP
+Each file is organized as a linear array of bytes. No record
+boundaries or system related information is present in
+a file.
+Files may be read and written in a random-access fashion.
+The user may read the data in a directory as though
+it were an ordinary file to determine the names of the contained files,
+but only the system may write into the directories.
+The file system stores only a small amount of ownership, protection and usage
+information with a file.
+.NH 3
+Naming
+.PP
+The file system calls take \fIpath name\fP arguments.
+These consist of a zero or more component \fIfile names\fP
+separated by ``/\^'' characters, where each file name
+is up to 255 ASCII characters excluding null and ``/\^''.
+.PP
+Each process always has two naming contexts: one for the
+root directory of the file system and one for the
+current working directory. These are used
+by the system in the filename translation process.
+If a path name begins with a ``/\^'', it is called
+a full path name and interpreted relative to the root directory context.
+If the path name does not begin with a ``/\^'' it is called
+a relative path name and interpreted relative to the current directory
+context.
+.PP
+The system limits
+the total length of a path name to 1024 characters.
+.PP
+The file name ``..'' in each directory refers to
+the parent directory of that directory.
+The parent directory of the root of the file system is always that directory.
+.PP
+The calls
+.DS
+chdir(path);
+char *path;
+
+chroot(path)
+char *path;
+.DE
+change the current working directory and root directory context of a process.
+Only the super-user can change the root directory context of a process.
+.NH 3
+Creation and removal
+.PP
+The file system allows directories, files, special devices,
+and ``portals'' to be created and removed from the file system.
+.NH 4
+Directory creation and removal
+.PP
+A directory is created with the \fImkdir\fP system call:
+.DS
+mkdir(path, mode);
+char *path; int mode;
+.DE
+where the mode is defined as for files (see below).
+Directories are removed with the \fIrmdir\fP system call:
+.DS
+rmdir(path);
+char *path;
+.DE
+A directory must be empty if it is to be deleted.
+.NH 4
+File creation
+.PP
+Files are created with the \fIopen\fP system call,
+.DS
+fd = open(path, oflag, mode);
+result int fd; char *path; int oflag, mode;
+.DE
+The \fIpath\fP parameter specifies the name of the
+file to be created. The \fIoflag\fP parameter must
+include O_CREAT from below to cause the file to be created.
+Bits for \fIoflag\fP are
+defined in \fI<sys/file.h>\fP:
+.DS
+._d
+#define O_RDONLY 000 /* open for reading */
+#define O_WRONLY 001 /* open for writing */
+#define O_RDWR 002 /* open for read & write */
+#define O_NDELAY 004 /* non-blocking open */
+#define O_APPEND 010 /* append on each write */
+#define O_CREAT 01000 /* open with file create */
+#define O_TRUNC 02000 /* open with truncation */
+#define O_EXCL 04000 /* error on create if file exists */
+.DE
+.PP
+One of O_RDONLY, O_WRONLY and O_RDWR should be specified,
+indicating what types of operations are desired to be performed
+on the open file. The operations will be checked against the user's
+access rights to the file before allowing the \fIopen\fP to succeed.
+Specifying O_APPEND causes writes to automatically append to the
+file.
+The flag O_CREAT causes the file to be created if it does not
+exist, owned by the current user
+and the group of the containing directory.
+The protection for the new file is specified in \fImode\fP.
+The file mode is used as a three digit octal number.
+Each digit encodes read access as 4, write access as 2 and execute
+access as 1, or'ed together. The 0700 bits describe owner
+access, the 070 bits describe the access rights for processes in the same
+group as the file, and the 07 bits describe the access rights
+for other processes.
+.PP
+If the open specifies to create the file with O_EXCL
+and the file already exists, then the \fIopen\fP will fail
+without affecting the file in any way. This provides a
+simple exclusive access facility.
+If the file exists but is a symbolic link, the open will fail
+regardless of the existence of the file specified by the link.
+.NH 4
+Creating references to devices
+.PP
+The file system allows entries which reference peripheral devices.
+Peripherals are distinguished as \fIblock\fP or \fIcharacter\fP
+devices according by their ability to support block-oriented
+operations.
+Devices are identified by their ``major'' and ``minor''
+device numbers. The major device number determines the kind
+of peripheral it is, while the minor device number indicates
+one of possibly many peripherals of that kind.
+Structured devices have all operations performed internally
+in ``block'' quantities while
+unstructured devices often have a number of
+special \fIioctl\fP operations, and may have input and output
+performed in varying units.
+The \fImknod\fP call creates special entries:
+.DS
+mknod(path, mode, dev);
+char *path; int mode, dev;
+.DE
+where \fImode\fP is formed from the object type
+and access permissions. The parameter \fIdev\fP is a configuration
+dependent parameter used to identify specific character or
+block I/O devices.
+.NH 4
+Portal creation\(dg
+.PP
+.FS
+\(dg The \fIportal\fP call is not implemented in 4.3BSD.
+.FE
+The call
+.DS
+fd = portal(name, server, param, dtype, protocol, domain, socktype)
+result int fd; char *name, *server, *param; int dtype, protocol;
+int domain, socktype;
+.DE
+places a \fIname\fP in the file system name space that causes connection to a
+server process when the name is used.
+The portal call returns an active portal in \fIfd\fP as though an
+access had occurred to activate an inactive portal, as now described.
+.PP
+When an inactive portal is accessed, the system sets up a socket
+of the specified \fIsocktype\fP in the specified communications
+\fIdomain\fP (see section 2.3), and creates the \fIserver\fP process,
+giving it the specified \fIparam\fP as argument to help it identify
+the portal, and also giving it the newly created socket as descriptor
+number 0. The accessor of the portal will create a socket in the same
+\fIdomain\fP and \fIconnect\fP to the server. The user will then
+\fIwrap\fP the socket in the specified \fIprotocol\fP to create an object of
+the required descriptor type \fIdtype\fP and proceed with the
+operation which was in progress before the portal was encountered.
+.PP
+While the server process holds the socket (which it received as \fIfd\fP
+from the \fIportal\fP call on descriptor 0 at activation) further references
+will result in connections being made to the same socket.
+.NH 4
+File, device, and portal removal
+.PP
+A reference to a file, special device or portal may be removed with the
+\fIunlink\fP call,
+.DS
+unlink(path);
+char *path;
+.DE
+The caller must have write access to the directory in which
+the file is located for this call to be successful.
+.NH 3
+Reading and modifying file attributes
+.PP
+Detailed information about the attributes of a file
+may be obtained with the calls:
+.DS
+#include <sys/stat.h>
+
+stat(path, stb);
+char *path; result struct stat *stb;
+
+fstat(fd, stb);
+int fd; result struct stat *stb;
+.DE
+The \fIstat\fP structure includes the file
+type, protection, ownership, access times,
+size, and a count of hard links.
+If the file is a symbolic link, then the status of the link
+itself (rather than the file the link references)
+may be found using the \fIlstat\fP call:
+.DS
+lstat(path, stb);
+char *path; result struct stat *stb;
+.DE
+.PP
+Newly created files are assigned the user id of the
+process that created it and the group id of the directory
+in which it was created. The ownership of a file may
+be changed by either of the calls
+.DS
+chown(path, owner, group);
+char *path; int owner, group;
+
+fchown(fd, owner, group);
+int fd, owner, group;
+.DE
+.PP
+In addition to ownership, each file has three levels of access
+protection associated with it. These levels are owner relative,
+group relative, and global (all users and groups). Each level
+of access has separate indicators for read permission, write
+permission, and execute permission.
+The protection bits associated with a file may be set by either
+of the calls:
+.DS
+chmod(path, mode);
+char *path; int mode;
+
+fchmod(fd, mode);
+int fd, mode;
+.DE
+where \fImode\fP is a value indicating the new protection
+of the file, as listed in section 2.2.3.2.
+.PP
+Finally, the access and modify times on a file may be set by the call:
+.DS
+utimes(path, tvp)
+char *path; struct timeval *tvp[2];
+.DE
+This is particularly useful when moving files between media, to
+preserve relationships between the times the file was modified.
+.NH 3
+Links and renaming
+.PP
+Links allow multiple names for a file
+to exist. Links exist independently of the file linked to.
+.PP
+Two types of links exist, \fIhard\fP links and \fIsymbolic\fP
+links. A hard link is a reference counting mechanism that
+allows a file to have multiple names within the same file
+system. Symbolic links cause string substitution
+during the pathname interpretation process.
+.PP
+Hard links and symbolic links have different
+properties. A hard link insures the target
+file will always be accessible, even after its original
+directory entry is removed; no such guarantee exists for a symbolic link.
+Symbolic links can span file systems boundaries.
+.PP
+The following calls create a new link, named \fIpath2\fP,
+to \fIpath1\fP:
+.DS
+link(path1, path2);
+char *path1, *path2;
+
+symlink(path1, path2);
+char *path1, *path2;
+.DE
+The \fIunlink\fP primitive may be used to remove
+either type of link.
+.PP
+If a file is a symbolic link, the ``value'' of the
+link may be read with the \fIreadlink\fP call,
+.DS
+len = readlink(path, buf, bufsize);
+result int len; result char *path, *buf; int bufsize;
+.DE
+This call returns, in \fIbuf\fP, the null-terminated string
+substituted into pathnames passing through \fIpath\fP\|.
+.PP
+Atomic renaming of file system resident objects is possible
+with the \fIrename\fP call:
+.DS
+rename(oldname, newname);
+char *oldname, *newname;
+.DE
+where both \fIoldname\fP and \fInewname\fP must be
+in the same file system.
+If \fInewname\fP exists and is a directory, then it must be empty.
+.NH 3
+Extension and truncation
+.PP
+Files are created with zero length and may be extended
+simply by writing or appending to them. While a file is
+open the system maintains a pointer into the file
+indicating the current location in the file associated with
+the descriptor. This pointer may be moved about in the
+file in a random access fashion.
+To set the current offset into a file, the \fIlseek\fP
+call may be used,
+.DS
+oldoffset = lseek(fd, offset, type);
+result off_t oldoffset; int fd; off_t offset; int type;
+.DE
+where \fItype\fP is given in \fI<sys/file.h>\fP as one of:
+.DS
+._d
+#define L_SET 0 /* set absolute file offset */
+#define L_INCR 1 /* set file offset relative to current position */
+#define L_XTND 2 /* set offset relative to end-of-file */
+.DE
+The call ``lseek(fd, 0, L_INCR)''
+returns the current offset into the file.
+.PP
+Files may have ``holes'' in them. Holes are void areas in the
+linear extent of the file where data has never been
+written. These may be created by seeking to
+a location in a file past the current end-of-file and writing.
+Holes are treated by the system as zero valued bytes.
+.PP
+A file may be truncated with either of the calls:
+.DS
+truncate(path, length);
+char *path; int length;
+
+ftruncate(fd, length);
+int fd, length;
+.DE
+reducing the size of the specified file to \fIlength\fP bytes.
+.NH 3
+Checking accessibility
+.PP
+A process running with
+different real and effective user ids
+may interrogate the accessibility of a file to the
+real user by using
+the \fIaccess\fP call:
+.DS
+accessible = access(path, how);
+result int accessible; char *path; int how;
+.DE
+Here \fIhow\fP is constructed by or'ing the following bits, defined
+in \fI<sys/file.h>\fP:
+.DS
+._d
+#define F_OK 0 /* file exists */
+#define X_OK 1 /* file is executable */
+#define W_OK 2 /* file is writable */
+#define R_OK 4 /* file is readable */
+.DE
+The presence or absence of advisory locks does not affect the
+result of \fIaccess\fP\|.
+.NH 3
+Locking
+.PP
+The file system provides basic facilities that allow cooperating processes
+to synchronize their access to shared files. A process may
+place an advisory \fIread\fP or \fIwrite\fP lock on a file,
+so that other cooperating processes may avoid interfering
+with the process' access. This simple mechanism
+provides locking with file granularity. More granular
+locking can be built using the IPC facilities to provide a lock
+manager.
+The system does not force processes to obey the locks;
+they are of an advisory nature only.
+.PP
+Locking is performed after an \fIopen\fP call by applying the
+\fIflock\fP primitive,
+.DS
+flock(fd, how);
+int fd, how;
+.DE
+where the \fIhow\fP parameter is formed from bits defined in \fI<sys/file.h>\fP:
+.DS
+._d
+#define LOCK_SH 1 /* shared lock */
+#define LOCK_EX 2 /* exclusive lock */
+#define LOCK_NB 4 /* don't block when locking */
+#define LOCK_UN 8 /* unlock */
+.DE
+Successive lock calls may be used to increase or
+decrease the level of locking. If an object is currently
+locked by another process when a \fIflock\fP call is made,
+the caller will be blocked until the current lock owner
+releases the lock; this may be avoided by including LOCK_NB
+in the \fIhow\fP parameter.
+Specifying LOCK_UN removes all locks associated with the descriptor.
+Advisory locks held by a process are automatically deleted when
+the process terminates.
+.NH 3
+Disk quotas
+.PP
+As an optional facility, each file system may be requested to
+impose limits on a user's disk usage.
+Two quantities are limited: the total amount of disk space which
+a user may allocate in a file system and the total number of files
+a user may create in a file system. Quotas are expressed as
+\fIhard\fP limits and \fIsoft\fP limits. A hard limit is
+always imposed; if a user would exceed a hard limit, the operation
+which caused the resource request will fail. A soft limit results
+in the user receiving a warning message, but with allocation succeeding.
+Facilities are provided to turn soft limits into hard limits if a
+user has exceeded a soft limit for an unreasonable period of time.
+.PP
+To enable disk quotas on a file system the \fIsetquota\fP call
+is used:
+.DS
+setquota(special, file)
+char *special, *file;
+.DE
+where \fIspecial\fP refers to a structured device file where
+a mounted file system exists, and
+\fIfile\fP refers to a disk quota file (residing on the file
+system associated with \fIspecial\fP) from which user quotas
+should be obtained. The format of the disk quota file is
+implementation dependent.
+.PP
+To manipulate disk quotas the \fIquota\fP call is provided:
+.DS
+#include <sys/quota.h>
+
+quota(cmd, uid, arg, addr)
+int cmd, uid, arg; caddr_t addr;
+.DE
+The indicated \fIcmd\fP is applied to the user ID \fIuid\fP.
+The parameters \fIarg\fP and \fIaddr\fP are command specific.
+The file \fI<sys/quota.h>\fP contains definitions pertinent to the
+use of this call.
diff --git a/share/doc/psd/05.sysman/2.3.t b/share/doc/psd/05.sysman/2.3.t
new file mode 100644
index 0000000..509c7f3
--- /dev/null
+++ b/share/doc/psd/05.sysman/2.3.t
@@ -0,0 +1,412 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.3.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "Interprocess communications
+.NH 3
+Interprocess communication primitives
+.NH 4
+Communication domains
+.PP
+The system provides access to an extensible set of
+communication \fIdomains\fP. A communication domain
+is identified by a manifest constant defined in the
+file \fI<sys/socket.h>\fP.
+Important standard domains supported by the system are the ``unix''
+domain, AF_UNIX, for communication within the system, the ``Internet''
+domain for communication in the DARPA Internet, AF_INET,
+and the ``NS'' domain, AF_NS, for communication
+using the Xerox Network Systems protocols.
+Other domains can be added to the system.
+.NH 4
+Socket types and protocols
+.PP
+Within a domain, communication takes place between communication endpoints
+known as \fIsockets\fP. Each socket has the potential to exchange
+information with other sockets of an appropriate type within the domain.
+.PP
+Each socket has an associated
+abstract type, which describes the semantics of communication using that
+socket. Properties such as reliability, ordering, and prevention
+of duplication of messages are determined by the type.
+The basic set of socket types is defined in \fI<sys/socket.h>\fP:
+.DS
+/* Standard socket types */
+._d
+#define SOCK_DGRAM 1 /* datagram */
+#define SOCK_STREAM 2 /* virtual circuit */
+#define SOCK_RAW 3 /* raw socket */
+#define SOCK_RDM 4 /* reliably-delivered message */
+#define SOCK_SEQPACKET 5 /* sequenced packets */
+.DE
+The SOCK_DGRAM type models the semantics of datagrams in network communication:
+messages may be lost or duplicated and may arrive out-of-order.
+A datagram socket may send messages to and receive messages from multiple
+peers.
+The SOCK_RDM type models the semantics of reliable datagrams: messages
+arrive unduplicated and in-order, the sender is notified if
+messages are lost.
+The \fIsend\fP and \fIreceive\fP operations (described below)
+generate reliable/unreliable datagrams.
+The SOCK_STREAM type models connection-based virtual circuits: two-way
+byte streams with no record boundaries.
+Connection setup is required before data communication may begin.
+The SOCK_SEQPACKET type models a connection-based,
+full-duplex, reliable, sequenced packet exchange;
+the sender is notified if messages are lost, and messages are never
+duplicated or presented out-of-order.
+Users of the last two abstractions may use the facilities for
+out-of-band transmission to send out-of-band data.
+.PP
+SOCK_RAW is used for unprocessed access to internal network layers
+and interfaces; it has no specific semantics.
+.PP
+Other socket types can be defined.
+.PP
+Each socket may have a specific \fIprotocol\fP associated with it.
+This protocol is used within the domain to provide the semantics
+required by the socket type.
+Not all socket types are supported by each domain;
+support depends on the existence and the implementation
+of a suitable protocol within the domain.
+For example, within the ``Internet'' domain, the SOCK_DGRAM type may be
+implemented by the UDP user datagram protocol, and the SOCK_STREAM
+type may be implemented by the TCP transmission control protocol, while
+no standard protocols to provide SOCK_RDM or SOCK_SEQPACKET sockets exist.
+.NH 4
+Socket creation, naming and service establishment
+.PP
+Sockets may be \fIconnected\fP or \fIunconnected\fP. An unconnected
+socket descriptor is obtained by the \fIsocket\fP call:
+.DS
+s = socket(domain, type, protocol);
+result int s; int domain, type, protocol;
+.DE
+The socket domain and type are as described above,
+and are specified using the definitions from \fI<sys/socket.h>\fP.
+The protocol may be given as 0, meaning any suitable protocol.
+One of several possible protocols may be selected using identifiers
+obtained from a library routine, \fIgetprotobyname\fP.
+.PP
+An unconnected socket descriptor of a connection-oriented type
+may yield a connected socket descriptor
+in one of two ways: either by actively connecting to another socket,
+or by becoming associated with a name in the communications domain and
+\fIaccepting\fP a connection from another socket.
+Datagram sockets need not establish connections before use.
+.PP
+To accept connections or to receive datagrams,
+a socket must first have a binding
+to a name (or address) within the communications domain.
+Such a binding may be established by a \fIbind\fP call:
+.DS
+bind(s, name, namelen);
+int s; struct sockaddr *name; int namelen;
+.DE
+Datagram sockets may have default bindings established when first
+sending data if not explicitly bound earlier.
+In either case,
+a socket's bound name may be retrieved with a \fIgetsockname\fP call:
+.DS
+getsockname(s, name, namelen);
+int s; result struct sockaddr *name; result int *namelen;
+.DE
+while the peer's name can be retrieved with \fIgetpeername\fP:
+.DS
+getpeername(s, name, namelen);
+int s; result struct sockaddr *name; result int *namelen;
+.DE
+Domains may support sockets with several names.
+.NH 4
+Accepting connections
+.PP
+Once a binding is made to a connection-oriented socket,
+it is possible to \fIlisten\fP for connections:
+.DS
+listen(s, backlog);
+int s, backlog;
+.DE
+The \fIbacklog\fP specifies the maximum count of connections
+that can be simultaneously queued awaiting acceptance.
+.PP
+An \fIaccept\fP call:
+.DS
+t = accept(s, name, anamelen);
+result int t; int s; result struct sockaddr *name; result int *anamelen;
+.DE
+returns a descriptor for a new, connected, socket
+from the queue of pending connections on \fIs\fP.
+If no new connections are queued for acceptance,
+the call will wait for a connection unless non-blocking I/O has been enabled.
+.NH 4
+Making connections
+.PP
+An active connection to a named socket is made by the \fIconnect\fP call:
+.DS
+connect(s, name, namelen);
+int s; struct sockaddr *name; int namelen;
+.DE
+Although datagram sockets do not establish connections,
+the \fIconnect\fP call may be used with such sockets
+to create an \fIassociation\fP with the foreign address.
+The address is recorded for use in future \fIsend\fP calls,
+which then need not supply destination addresses.
+Datagrams will be received only from that peer,
+and asynchronous error reports may be received.
+.PP
+It is also possible to create connected pairs of sockets without
+using the domain's name space to rendezvous; this is done with the
+\fIsocketpair\fP call\(dg:
+.FS
+\(dg 4.3BSD supports \fIsocketpair\fP creation only in the ``unix''
+communication domain.
+.FE
+.DS
+socketpair(domain, type, protocol, sv);
+int domain, type, protocol; result int sv[2];
+.DE
+Here the returned \fIsv\fP descriptors correspond to those obtained with
+\fIaccept\fP and \fIconnect\fP.
+.PP
+The call
+.DS
+pipe(pv)
+result int pv[2];
+.DE
+creates a pair of SOCK_STREAM sockets in the UNIX domain,
+with pv[0] only writable and pv[1] only readable.
+.NH 4
+Sending and receiving data
+.PP
+Messages may be sent from a socket by:
+.DS
+cc = sendto(s, buf, len, flags, to, tolen);
+result int cc; int s; caddr_t buf; int len, flags; caddr_t to; int tolen;
+.DE
+if the socket is not connected or:
+.DS
+cc = send(s, buf, len, flags);
+result int cc; int s; caddr_t buf; int len, flags;
+.DE
+if the socket is connected.
+The corresponding receive primitives are:
+.DS
+msglen = recvfrom(s, buf, len, flags, from, fromlenaddr);
+result int msglen; int s; result caddr_t buf; int len, flags;
+result caddr_t from; result int *fromlenaddr;
+.DE
+and
+.DS
+msglen = recv(s, buf, len, flags);
+result int msglen; int s; result caddr_t buf; int len, flags;
+.DE
+.PP
+In the unconnected case,
+the parameters \fIto\fP and \fItolen\fP
+specify the destination or source of the message, while
+the \fIfrom\fP parameter stores the source of the message,
+and \fI*fromlenaddr\fP initially gives the size of the \fIfrom\fP
+buffer and is updated to reflect the true length of the \fIfrom\fP
+address.
+.PP
+All calls cause the message to be received in or sent from
+the message buffer of length \fIlen\fP bytes, starting at address \fIbuf\fP.
+The \fIflags\fP specify
+peeking at a message without reading it or sending or receiving
+high-priority out-of-band messages, as follows:
+.DS
+._d
+#define MSG_PEEK 0x1 /* peek at incoming message */
+#define MSG_OOB 0x2 /* process out-of-band data */
+.DE
+.NH 4
+Scatter/gather and exchanging access rights
+.PP
+It is possible scatter and gather data and to exchange access rights
+with messages. When either of these operations is involved,
+the number of parameters to the call becomes large.
+Thus the system defines a message header structure, in \fI<sys/socket.h>\fP,
+which can be
+used to conveniently contain the parameters to the calls:
+.DS
+.if t .ta .5i 1.25i 2i 2.7i
+.if n ._f
+struct msghdr {
+ caddr_t msg_name; /* optional address */
+ int msg_namelen; /* size of address */
+ struct iov *msg_iov; /* scatter/gather array */
+ int msg_iovlen; /* # elements in msg_iov */
+ caddr_t msg_accrights; /* access rights sent/received */
+ int msg_accrightslen; /* size of msg_accrights */
+};
+.DE
+Here \fImsg_name\fP and \fImsg_namelen\fP specify the source or destination
+address if the socket is unconnected; \fImsg_name\fP may be given as
+a null pointer if no names are desired or required.
+The \fImsg_iov\fP and \fImsg_iovlen\fP describe the scatter/gather
+locations, as described in section 2.1.3.
+Access rights to be sent along with the message are specified
+in \fImsg_accrights\fP, which has length \fImsg_accrightslen\fP.
+In the ``unix'' domain these are an array of integer descriptors,
+taken from the sending process and duplicated in the receiver.
+.PP
+This structure is used in the operations \fIsendmsg\fP and \fIrecvmsg\fP:
+.DS
+sendmsg(s, msg, flags);
+int s; struct msghdr *msg; int flags;
+
+msglen = recvmsg(s, msg, flags);
+result int msglen; int s; result struct msghdr *msg; int flags;
+.DE
+.NH 4
+Using read and write with sockets
+.PP
+The normal UNIX \fIread\fP and \fIwrite\fP calls may be
+applied to connected sockets and translated into \fIsend\fP and \fIreceive\fP
+calls from or to a single area of memory and discarding any rights
+received. A process may operate on a virtual circuit socket, a terminal
+or a file with blocking or non-blocking input/output
+operations without distinguishing the descriptor type.
+.NH 4
+Shutting down halves of full-duplex connections
+.PP
+A process that has a full-duplex socket such as a virtual circuit
+and no longer wishes to read from or write to this socket can
+give the call:
+.DS
+shutdown(s, direction);
+int s, direction;
+.DE
+where \fIdirection\fP is 0 to not read further, 1 to not
+write further, or 2 to completely shut the connection down.
+If the underlying protocol supports unidirectional or bidirectional shutdown,
+this indication will be passed to the peer.
+For example, a shutdown for writing might produce an end-of-file
+condition at the remote end.
+.NH 4
+Socket and protocol options
+.PP
+Sockets, and their underlying communication protocols, may
+support \fIoptions\fP. These options may be used to manipulate
+implementation- or protocol-specific facilities.
+The \fIgetsockopt\fP
+and \fIsetsockopt\fP calls are used to control options:
+.DS
+getsockopt(s, level, optname, optval, optlen)
+int s, level, optname; result caddr_t optval; result int *optlen;
+
+setsockopt(s, level, optname, optval, optlen)
+int s, level, optname; caddr_t optval; int optlen;
+.DE
+The option \fIoptname\fP is interpreted at the indicated
+protocol \fIlevel\fP for socket \fIs\fP. If a value is specified
+with \fIoptval\fP and \fIoptlen\fP, it is interpreted by
+the software operating at the specified \fIlevel\fP. The \fIlevel\fP
+SOL_SOCKET is reserved to indicate options maintained
+by the socket facilities. Other \fIlevel\fP values indicate
+a particular protocol which is to act on the option request;
+these values are normally interpreted as a ``protocol number''.
+.NH 3
+UNIX domain
+.PP
+This section describes briefly the properties of the UNIX communications
+domain.
+.NH 4
+Types of sockets
+.PP
+In the UNIX domain,
+the SOCK_STREAM abstraction provides pipe-like
+facilities, while SOCK_DGRAM provides (usually)
+reliable message-style communications.
+.NH 4
+Naming
+.PP
+Socket names are strings and may appear in the UNIX file
+system name space through portals\(dg.
+.FS
+\(dg The 4.3BSD implementation of the UNIX domain embeds
+bound sockets in the UNIX file system name space;
+this may change in future releases.
+.FE
+.NH 4
+Access rights transmission
+.PP
+The ability to pass UNIX descriptors with messages in this domain
+allows migration of service within the system and allows
+user processes to be used in building system facilities.
+.NH 3
+INTERNET domain
+.PP
+This section describes briefly how the Internet domain is
+mapped to the model described in this section. More
+information will be found in the document describing the
+network implementation in 4.3BSD.
+.NH 4
+Socket types and protocols
+.PP
+SOCK_STREAM is supported by the Internet TCP protocol;
+SOCK_DGRAM by the UDP protocol.
+Each is layered atop the transport-level Internet Protocol (IP).
+The Internet Control Message Protocol is implemented atop/beside IP
+and is accessible via a raw socket.
+The SOCK_SEQPACKET
+has no direct Internet family analogue; a protocol
+based on one from the XEROX NS family and layered on
+top of IP could be implemented to fill this gap.
+.NH 4
+Socket naming
+.PP
+Sockets in the Internet domain have names composed of the 32 bit
+Internet address, and a 16 bit port number.
+Options may be used to
+provide IP source routing or security options.
+The 32-bit address is composed of network and host parts;
+the network part is variable in size and is frequency encoded.
+The host part may optionally be interpreted as a subnet field
+plus the host on subnet; this is is enabled by setting a network address
+mask at boot time.
+.NH 4
+Access rights transmission
+.PP
+No access rights transmission facilities are provided in the Internet domain.
+.NH 4
+Raw access
+.PP
+The Internet domain allows the super-user access to the raw facilities
+of IP.
+These interfaces are modeled as SOCK_RAW sockets.
+Each raw socket is associated with one IP protocol number,
+and receives all traffic received for that protocol.
+This allows administrative and debugging
+functions to occur,
+and enables user-level implementations of special-purpose protocols
+such as inter-gateway routing protocols.
diff --git a/share/doc/psd/05.sysman/2.4.t b/share/doc/psd/05.sysman/2.4.t
new file mode 100644
index 0000000..cd7dcb9
--- /dev/null
+++ b/share/doc/psd/05.sysman/2.4.t
@@ -0,0 +1,174 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.4.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "Terminals and Devices
+.NH 3
+Terminals
+.PP
+Terminals support \fIread\fP and \fIwrite\fP I/O operations,
+as well as a collection of terminal specific \fIioctl\fP operations,
+to control input character interpretation and editing,
+and output format and delays.
+.NH 4
+Terminal input
+.PP
+Terminals are handled according to the underlying communication
+characteristics such as baud rate and required delays,
+and a set of software parameters.
+.NH 5
+Input modes
+.PP
+A terminal is in one of three possible modes: \fIraw\fP, \fIcbreak\fP,
+or \fIcooked\fP.
+In raw mode all input is passed through to the
+reading process immediately and without interpretation.
+In cbreak mode, the handler interprets input only by looking
+for characters that cause interrupts or output flow control;
+all other characters are made available as in raw mode.
+In cooked mode, input
+is processed to provide standard line-oriented local editing functions,
+and input is presented on a line-by-line basis.
+.NH 5
+Interrupt characters
+.PP
+Interrupt characters are interpreted by the terminal handler only in
+cbreak and cooked modes, and
+cause a software interrupt to be sent to all processes in the process
+group associated with the terminal. Interrupt characters exist
+to send SIGINT
+and SIGQUIT signals,
+and to stop a process group
+with the SIGTSTP signal either immediately, or when
+all input up to the stop character has been read.
+.NH 5
+Line editing
+.PP
+When the terminal is in cooked mode, editing of an input line
+is performed. Editing facilities allow deletion of the previous
+character or word, or deletion of the current input line.
+In addition, a special character may be used to reprint the current
+input line after some number of editing operations have been applied.
+.PP
+Certain other characters are interpreted specially when a process is
+in cooked mode. The \fIend of line\fP character determines
+the end of an input record. The \fIend of file\fP character simulates
+an end of file occurrence on terminal input. Flow control is provided
+by \fIstop output\fP and \fIstart output\fP control characters. Output
+may be flushed with the \fIflush output\fP character; and a \fIliteral
+character\fP may be used to force literal input of the immediately
+following character in the input line.
+.PP
+Input characters may be echoed to the terminal as they are received.
+Non-graphic ASCII input characters may be echoed as a two-character
+printable representation, ``^character.''
+.NH 4
+Terminal output
+.PP
+On output, the terminal handler provides some simple formatting services.
+These include converting the carriage return character to the
+two character return-linefeed sequence,
+inserting delays after certain standard control characters,
+expanding tabs, and providing translations
+for upper-case only terminals.
+.NH 4
+Terminal control operations
+.PP
+When a terminal is first opened it is initialized to a standard
+state and configured with a set of standard control, editing,
+and interrupt characters. A process
+may alter this configuration with certain
+control operations, specifying parameters in a standard structure:\(dg
+.FS
+\(dg The control interface described here is an internal interface only
+in 4.3BSD. Future releases will probably use a modified interface
+based on currently-proposed standards.
+.FE
+.DS
+._f
+struct ttymode {
+ short tt_ispeed; /* input speed */
+ int tt_iflags; /* input flags */
+ short tt_ospeed; /* output speed */
+ int tt_oflags; /* output flags */
+};
+.DE
+and ``special characters'' are specified with the
+\fIttychars\fP structure,
+.DS
+._f
+struct ttychars {
+ char tc_erasec; /* erase char */
+ char tc_killc; /* erase line */
+ char tc_intrc; /* interrupt */
+ char tc_quitc; /* quit */
+ char tc_startc; /* start output */
+ char tc_stopc; /* stop output */
+ char tc_eofc; /* end-of-file */
+ char tc_brkc; /* input delimiter (like nl) */
+ char tc_suspc; /* stop process signal */
+ char tc_dsuspc; /* delayed stop process signal */
+ char tc_rprntc; /* reprint line */
+ char tc_flushc; /* flush output (toggles) */
+ char tc_werasc; /* word erase */
+ char tc_lnextc; /* literal next character */
+};
+.DE
+.NH 4
+Terminal hardware support
+.PP
+The terminal handler allows a user to access basic
+hardware related functions; e.g. line speed,
+modem control, parity, and stop bits. A special signal,
+SIGHUP, is automatically
+sent to processes in a terminal's process
+group when a carrier transition is detected. This is
+normally associated with a user hanging up on a modem
+controlled terminal line.
+.NH 3
+Structured devices
+.PP
+Structures devices are typified by disks and magnetic
+tapes, but may represent any random-access device.
+The system performs read-modify-write type buffering actions on block
+devices to allow them to be read and written in a totally random
+access fashion like ordinary files.
+File systems are normally created in block devices.
+.NH 3
+Unstructured devices
+.PP
+Unstructured devices are those devices which
+do not support block structure. Familiar unstructured devices
+are raw communications lines (with
+no terminal handler), raster plotters, magnetic tape and disks unfettered
+by buffering and permitting large block input/output and positioning
+and formatting commands.
diff --git a/share/doc/psd/05.sysman/2.5.t b/share/doc/psd/05.sysman/2.5.t
new file mode 100644
index 0000000..109eb6a
--- /dev/null
+++ b/share/doc/psd/05.sysman/2.5.t
@@ -0,0 +1,39 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.5.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh "Process and kernel descriptors
+.PP
+The status of the facilities in this section is still under discussion.
+The \fIptrace\fP facility of earlier UNIX systems
+remains in 4.3BSD.
+Planned enhancements would allow a descriptor-based process control facility.
diff --git a/share/doc/psd/05.sysman/Makefile b/share/doc/psd/05.sysman/Makefile
new file mode 100644
index 0000000..3e63a47
--- /dev/null
+++ b/share/doc/psd/05.sysman/Makefile
@@ -0,0 +1,11 @@
+# @(#)Makefile 8.1 (Berkeley) 6/8/93
+
+DIR= psd/05.sysman
+SRCS= 0.t 1.0.t 1.1.t 1.2.t 1.3.t 1.4.t 1.5.t 1.6.t 1.7.t \
+ 2.0.t 2.1.t 2.2.t 2.3.t 2.4.t 2.5.t a.t
+MACROS= -ms
+
+paper.ps: ${SRCS}
+ ${TBL} ${SRCS} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/psd/05.sysman/a.t b/share/doc/psd/05.sysman/a.t
new file mode 100644
index 0000000..3acefb1
--- /dev/null
+++ b/share/doc/psd/05.sysman/a.t
@@ -0,0 +1,235 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)a.t 8.1 (Berkeley) 6/8/93
+.\"
+.ds RH Summary of facilities
+.bp
+.SH
+\s+2I. Summary of facilities\s0
+.PP
+.de h
+.br
+.if n .ne 8
+\fB\\$1 \\$2\fP
+.br
+..
+.nr H1 0
+.NH
+Kernel primitives
+.LP
+.h 1.1. "Process naming and protection
+.in +5
+.TS
+lw(1.6i) aw(3i).
+sethostid set UNIX host id
+gethostid get UNIX host id
+sethostname set UNIX host name
+gethostname get UNIX host name
+getpid get process id
+fork create new process
+exit terminate a process
+execve execute a different process
+getuid get user id
+geteuid get effective user id
+setreuid set real and effective user id's
+getgid get accounting group id
+getegid get effective accounting group id
+getgroups get access group set
+setregid set real and effective group id's
+setgroups set access group set
+getpgrp get process group
+setpgrp set process group
+.TE
+.in -5
+.h 1.2 "Memory management
+.in +5
+.TS
+lw(1.6i) aw(3i).
+<sys/mman.h> memory management definitions
+sbrk change data section size
+sstk\(dg change stack section size
+.FS
+\(dg Not supported in 4.3BSD.
+.FE
+getpagesize get memory page size
+mmap\(dg map pages of memory
+msync\(dg flush modified mapped pages to filesystem
+munmap\(dg unmap memory
+mprotect\(dg change protection of pages
+madvise\(dg give memory management advice
+mincore\(dg determine core residency of pages
+msleep\(dg sleep on a lock
+mwakeup\(dg wakeup process sleeping on a lock
+.TE
+.in -5
+.h 1.3 "Signals
+.in +5
+.TS
+lw(1.6i) aw(3i).
+<signal.h> signal definitions
+sigvec set handler for signal
+kill send signal to process
+killpgrp send signal to process group
+sigblock block set of signals
+sigsetmask restore set of blocked signals
+sigpause wait for signals
+sigstack set software stack for signals
+.TE
+.in -5
+.h 1.4 "Timing and statistics
+.in +5
+.TS
+lw(1.6i) aw(3i).
+<sys/time.h> time-related definitions
+gettimeofday get current time and timezone
+settimeofday set current time and timezone
+getitimer read an interval timer
+setitimer get and set an interval timer
+profil profile process
+.TE
+.in -5
+.h 1.5 "Descriptors
+.in +5
+.TS
+lw(1.6i) aw(3i).
+getdtablesize descriptor reference table size
+dup duplicate descriptor
+dup2 duplicate to specified index
+close close descriptor
+select multiplex input/output
+fcntl control descriptor options
+wrap\(dg wrap descriptor with protocol
+.FS
+\(dg Not supported in 4.3BSD.
+.FE
+.TE
+.in -5
+.h 1.6 "Resource controls
+.in +5
+.TS
+lw(1.6i) aw(3i).
+<sys/resource.h> resource-related definitions
+getpriority get process priority
+setpriority set process priority
+getrusage get resource usage
+getrlimit get resource limitations
+setrlimit set resource limitations
+.TE
+.in -5
+.h 1.7 "System operation support
+.in +5
+.TS
+lw(1.6i) aw(3i).
+mount mount a device file system
+swapon add a swap device
+umount umount a file system
+sync flush system caches
+reboot reboot a machine
+acct specify accounting file
+.TE
+.in -5
+.NH
+System facilities
+.LP
+.h 2.1 "Generic operations
+.in +5
+.TS
+lw(1.6i) aw(3i).
+read read data
+write write data
+<sys/uio.h> scatter-gather related definitions
+readv scattered data input
+writev gathered data output
+<sys/ioctl.h> standard control operations
+ioctl device control operation
+.TE
+.in -5
+.h 2.2 "File system
+.PP
+Operations marked with a * exist in two forms: as shown,
+operating on a file name, and operating on a file descriptor,
+when the name is preceded with a ``f''.
+.in +5
+.TS
+lw(1.6i) aw(3i).
+<sys/file.h> file system definitions
+chdir change directory
+chroot change root directory
+mkdir make a directory
+rmdir remove a directory
+open open a new or existing file
+mknod make a special file
+portal\(dg make a portal entry
+unlink remove a link
+stat* return status for a file
+lstat returned status of link
+chown* change owner
+chmod* change mode
+utimes change access/modify times
+link make a hard link
+symlink make a symbolic link
+readlink read contents of symbolic link
+rename change name of file
+lseek reposition within file
+truncate* truncate file
+access determine accessibility
+flock lock a file
+.TE
+.in -5
+.h 2.3 "Communications
+.in +5
+.TS
+lw(1.6i) aw(3i).
+<sys/socket.h> standard definitions
+socket create socket
+bind bind socket to name
+getsockname get socket name
+listen allow queuing of connections
+accept accept a connection
+connect connect to peer socket
+socketpair create pair of connected sockets
+sendto send data to named socket
+send send data to connected socket
+recvfrom receive data on unconnected socket
+recv receive data on connected socket
+sendmsg send gathered data and/or rights
+recvmsg receive scattered data and/or rights
+shutdown partially close full-duplex connection
+getsockopt get socket option
+setsockopt set socket option
+.TE
+.in -5
+.h 2.4 "Terminals, block and character devices
+.in +5
+.in -5
+.h 2.5 "Processes and kernel hooks
+.in -5
diff --git a/share/doc/psd/05.sysman/spell.ok b/share/doc/psd/05.sysman/spell.ok
new file mode 100644
index 0000000..b0cbd9c
--- /dev/null
+++ b/share/doc/psd/05.sysman/spell.ok
@@ -0,0 +1,332 @@
+AF
+ANON
+AUTOBOOT
+Behav
+CLR
+DEF
+DGRAM
+DONTNEED
+Datagram
+Datagrams
+EINPROGRESS
+EWOULDBLOCK
+EXCL
+FD
+FSIZE
+Fabry
+GETFL
+GETOWN
+HASSEMAPHORE
+HASSEMPHORE
+IGN
+INCR
+INET
+IP
+IPC
+ISSET
+ITIMER
+Karels
+Leffler
+MADV
+MAXHOSTNAMELEN
+MSG
+Manual''PS1:6
+McKusick
+Mclear
+Mset
+NB
+NDELAY
+NGROUPS
+NLIMITS
+NOEXTEND
+NS
+OOB
+PGRP
+PRIO
+PROT
+PS1:6
+RB
+RDM
+RDONLY
+RDWR
+RH
+RLIM
+RLIMIT
+RSS
+RUSAGE
+SEQPACKET
+SETFL
+SETOWN
+SIG
+SIGALRM
+SIGBUS
+SIGCHLD
+SIGCONT
+SIGEMT
+SIGFPE
+SIGHUP
+SIGILL
+SIGINT
+SIGIO
+SIGIOT
+SIGKILL
+SIGPROF
+SIGQUIT
+SIGSEGV
+SIGSTOP
+SIGTERM
+SIGTRAP
+SIGTSTP
+SIGTTIN
+SIGTTOU
+SIGURG
+SIGUSR1
+SIGUSR2
+SIGVTALRM
+SIGXCPU
+SIGXFSZ
+Sem
+Sv
+TCP
+TRUNC
+UDP
+VAX
+WILLNEED
+WRONLY
+XTND
+accessor
+accrights
+accrightslen
+addr
+anamelen
+arg
+argv
+arusage
+astatus
+behav
+blkdev
+brkc
+bu
+buf
+buflen
+bufsize
+caddr
+cbreak
+chroot
+cmd
+datagram
+datagrams
+dev
+dopt
+dprop
+ds
+dst
+dsttime
+dsuspc
+dtype
+dup2
+egid
+envp
+eofc
+erasec
+errno
+euid
+fchmod
+fchown
+fcntl
+fd
+fdset
+file.h
+filename
+filesystem
+flushc
+fromlenaddr
+fs
+fstat
+ftruncate
+getdtablesize
+getegid
+geteuid
+getgid
+getgroups
+gethostid
+gethostname
+getitimer
+getpagesize
+getpeername
+getpriority
+getprotobyname
+getrlimit
+getrusage
+getsockname
+getsockopt
+gettimeofday
+gid
+gidset
+gidsetsize
+hostid
+idrss
+iflags
+inblock
+incr
+intrc
+ioctl.h
+iov
+iovec
+iovlen
+ispeed
+isrss
+itimerval
+ixrss
+kbytes
+killc
+killpgrp
+len
+linefeed
+lnextc
+lstat
+maddr
+madvise
+majflt
+maxrss
+mclear
+mincore
+minflt
+minuteswest
+mman.h
+mmap
+mprotect
+mremap
+mset
+msg
+msghdr
+msglen
+msgrcv
+msgsnd
+msleep
+msync
+munmap
+mwakeup
+namelen
+nbytes
+nd
+nds
+newname
+ngroups
+nivcsw
+nl
+nsignals
+nswap
+nvcsw
+oflag
+oflags
+oldmask
+oldname
+oldoffset
+onstack
+optlen
+optname
+optval
+or'ed
+or'ing
+ospeed
+oss
+osv
+oublock
+ovalue
+pagesize
+param
+param.h
+path1
+path2
+pathname
+pathnames
+pgrp
+pid
+pos
+prio
+prot
+proto
+pv
+quitc
+quota.h
+readlink
+readv
+reboot.h
+recv
+recvfrom
+recvmsg
+resource.h
+rgid
+rlim
+rlimit
+rlp
+ronly
+rprntc
+ru
+ruid
+rusage
+sbrk
+scp
+sem
+sendmsg
+sendto
+setgroups
+sethostid
+sethostname
+setitimer
+setpriority
+setquota
+setregid
+setreuid
+setrlimit
+setsockopt
+settimeofday
+sigblock
+sigcontext
+sigmask
+signal.h
+signo
+sigpause
+sigsetmask
+sigstack
+sigvec
+sockaddr
+socket.h
+socketpair
+socktype
+sp
+ss
+sstk
+startc
+stat.h
+stb
+stopc
+suspc
+sv
+sw
+symlink
+ta
+time.h
+timeval
+timezone
+tolen
+tt
+ttychars
+ttymode
+tv
+tvp
+tvsec
+types.h
+tz
+tzp
+uid
+uio.h
+umount
+usec
+vec
+wait.h
+waitstatus
+werasc
+writeable
+writev
diff --git a/share/doc/psd/20.ipctut/Makefile b/share/doc/psd/20.ipctut/Makefile
new file mode 100644
index 0000000..9fe861d
--- /dev/null
+++ b/share/doc/psd/20.ipctut/Makefile
@@ -0,0 +1,13 @@
+# @(#)Makefile 8.1 (Berkeley) 8/14/93
+
+DIR= psd/20.ipctut
+SRCS= tutor.me
+MACROS= -me
+EXTRA= dgramread.c dgramsend.c fig2.pic fig3.pic fig8.pic pipe.c \
+ socketpair.c strchkread.c streamread.c streamwrite.c \
+ udgramread.c udgramsend.c ustreamread.c ustreamwrite.c
+
+paper.ps: ${SRCS} ${EXTRA}
+ ${SOELIM} ${SRCS} | ${PIC} | ${TBL} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/psd/20.ipctut/dgramread.c b/share/doc/psd/20.ipctut/dgramread.c
new file mode 100644
index 0000000..cd0f147
--- /dev/null
+++ b/share/doc/psd/20.ipctut/dgramread.c
@@ -0,0 +1,83 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)dgramread.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stdio.h>
+
+/*
+ * In the included file <netinet/in.h> a sockaddr_in is defined as follows:
+ * struct sockaddr_in {
+ * short sin_family;
+ * u_short sin_port;
+ * struct in_addr sin_addr;
+ * char sin_zero[8];
+ * };
+ *
+ * This program creates a datagram socket, binds a name to it, then reads
+ * from the socket.
+ */
+main()
+{
+ int sock, length;
+ struct sockaddr_in name;
+ char buf[1024];
+
+ /* Create socket from which to read. */
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("opening datagram socket");
+ exit(1);
+ }
+ /* Create name with wildcards. */
+ name.sin_family = AF_INET;
+ name.sin_addr.s_addr = INADDR_ANY;
+ name.sin_port = 0;
+ if (bind(sock, &name, sizeof(name))) {
+ perror("binding datagram socket");
+ exit(1);
+ }
+ /* Find assigned port value and print it out. */
+ length = sizeof(name);
+ if (getsockname(sock, &name, &length)) {
+ perror("getting socket name");
+ exit(1);
+ }
+ printf("Socket has port #%d\en", ntohs(name.sin_port));
+ /* Read from the socket */
+ if (read(sock, buf, 1024) < 0)
+ perror("receiving datagram packet");
+ printf("-->%s\en", buf);
+ close(sock);
+}
diff --git a/share/doc/psd/20.ipctut/dgramsend.c b/share/doc/psd/20.ipctut/dgramsend.c
new file mode 100644
index 0000000..831fbf1
--- /dev/null
+++ b/share/doc/psd/20.ipctut/dgramsend.c
@@ -0,0 +1,80 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)dgramsend.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <stdio.h>
+
+#define DATA "The sea is calm tonight, the tide is full . . ."
+
+/*
+ * Here I send a datagram to a receiver whose name I get from the command
+ * line arguments. The form of the command line is dgramsend hostname
+ * portnumber
+ */
+
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+ int sock;
+ struct sockaddr_in name;
+ struct hostent *hp, *gethostbyname();
+
+ /* Create socket on which to send. */
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("opening datagram socket");
+ exit(1);
+ }
+ /*
+ * Construct name, with no wildcards, of the socket to send to.
+ * Getnostbyname() returns a structure including the network address
+ * of the specified host. The port number is taken from the command
+ * line.
+ */
+ hp = gethostbyname(argv[1]);
+ if (hp == 0) {
+ fprintf(stderr, "%s: unknown host\n", argv[1]);
+ exit(2);
+ }
+ bcopy(hp->h_addr, &name.sin_addr, hp->h_length);
+ name.sin_family = AF_INET;
+ name.sin_port = htons(atoi(argv[2]));
+ /* Send message. */
+ if (sendto(sock, DATA, sizeof(DATA), 0, &name, sizeof(name)) < 0)
+ perror("sending datagram message");
+ close(sock);
+}
diff --git a/share/doc/psd/20.ipctut/fig2.pic b/share/doc/psd/20.ipctut/fig2.pic
new file mode 100644
index 0000000..ffbc193
--- /dev/null
+++ b/share/doc/psd/20.ipctut/fig2.pic
@@ -0,0 +1,77 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" @(#)fig2.pic 8.1 (Berkeley) 8/14/93
+.PS
+.ps
+.ps 10
+arc at 5.407,4.723 from 5.375,4.838 to 5.362,4.612 cw
+arc at 5.907,7.973 from 5.875,8.088 to 5.862,7.862 cw
+line from 5.963,5.513 to 6.925,5.513
+line from 5.963,5.650 to 6.925,5.650
+line from 5.963,5.787 to 6.925,5.787
+line from 5.963,5.912 to 6.925,5.912
+line from 5.963,6.050 to 6.925,6.050
+line from 5.963,6.200 to 6.925,6.200 to 6.925,5.375 to 5.963,5.375 to 5.963,6.200
+ellipse at 6.388,6.713 wid 0.475 ht 0.475
+line from 6.388,6.463 to 6.388,6.200
+line from 3.150,6.200 to 4.112,6.200 to 4.112,5.375 to 3.150,5.375 to 3.150,6.200
+line from 3.150,6.050 to 4.112,6.050
+line from 3.150,5.912 to 4.112,5.912
+line from 3.150,5.787 to 4.112,5.787
+line from 3.150,5.650 to 4.112,5.650
+line from 3.150,5.513 to 4.112,5.513
+ellipse at 3.575,6.713 wid 0.475 ht 0.475
+line from 3.575,6.463 to 3.575,6.200
+line from 3.650,8.762 to 4.612,8.762
+line from 3.650,8.900 to 4.612,8.900
+line from 3.650,9.037 to 4.612,9.037
+line from 3.650,9.162 to 4.612,9.162
+line from 3.650,9.300 to 4.612,9.300
+line from 3.650,9.450 to 4.612,9.450 to 4.612,8.625 to 3.650,8.625 to 3.650,9.450
+ellipse at 4.075,9.963 wid 0.475 ht 0.475
+ellipse at 3.950,4.725 wid 0.225 ht 0.225
+ellipse at 4.450,7.975 wid 0.225 ht 0.225
+dashwid = 0.037i
+line dotted from 1.925,7.513 to 8.238,7.513
+line from 6.050,6.138 to 5.737,6.138 to 5.737,4.700 to 5.550,4.700
+line from 5.650,4.725 to 5.550,4.700 to 5.650,4.675
+line from 6.050,6.013 to 4.050,4.888
+line from 4.125,4.958 to 4.050,4.888 to 4.149,4.915
+line from 3.975,6.000 to 4.525,5.987 to 3.925,4.875
+line from 3.950,4.975 to 3.925,4.875 to 3.994,4.951
+line from 3.975,6.112 to 5.650,6.112 to 5.650,4.750 to 5.550,4.763
+line from 5.652,4.775 to 5.550,4.763 to 5.646,4.725
+line from 4.075,9.713 to 4.075,9.450
+line from 4.475,9.363 to 6.150,9.363 to 6.150,8.000 to 6.050,8.012
+line from 6.152,8.025 to 6.050,8.012 to 6.146,7.975
+line from 4.475,9.250 to 5.025,9.238 to 4.425,8.125
+line from 4.450,8.225 to 4.425,8.125 to 4.494,8.201
+.ps
+.ps 20
+line from 4.362,4.775 to 4.162,4.725 to 4.362,4.675
+line from 4.162,4.725 to 4.838,4.725
+.ps
+.ps 10
+line from 3.962,4.600 to 5.375,4.600
+line from 3.950,4.838 to 5.375,4.838
+line from 4.450,8.088 to 5.875,8.088
+line from 4.463,7.850 to 5.875,7.850
+.ps
+.ps 20
+line from 4.862,8.025 to 4.662,7.975 to 4.862,7.925
+line from 4.662,7.975 to 5.338,7.975
+.ps
+.ps 11
+.ft
+.ft R
+"Child" at 6.362,7.106
+.ps
+.ps 12
+"Parent" at 3.362,7.096 ljust
+"Parent" at 3.862,10.346 ljust
+"PIPE" at 4.987,4.671 ljust
+"PIPE" at 5.425,7.921 ljust
+.ps
+.ft
+.PE
diff --git a/share/doc/psd/20.ipctut/fig2.xfig b/share/doc/psd/20.ipctut/fig2.xfig
new file mode 100644
index 0000000..59b46be
--- /dev/null
+++ b/share/doc/psd/20.ipctut/fig2.xfig
@@ -0,0 +1,100 @@
+#FIG 2.0
+80 2
+5 1 0 1 0 0 0 0 0.000 0 0 0 432.554 462.170 430 453 442 461 429 471
+5 1 0 1 0 0 0 0 0.000 0 0 0 472.554 202.170 470 193 482 201 469 211
+6 414 279 589 424
+6 473 340 557 414
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 399 554 399 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 388 554 388 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 377 554 377 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 367 554 367 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 356 554 356 9999 9999
+2 2 0 1 0 0 0 0 0.000 0 0
+ 477 344 554 344 554 410 477 410 477 344 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 511 303 19 19 511 303 525 317
+2 1 0 1 0 0 0 0 0.000 0 0
+ 511 323 511 344 9999 9999
+-6
+6 189 279 364 424
+6 248 340 332 414
+2 2 0 1 0 0 0 0 0.000 0 0
+ 252 344 329 344 329 410 252 410 252 344 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 356 329 356 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 367 329 367 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 377 329 377 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 388 329 388 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 399 329 399 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 286 303 19 19 286 303 300 317
+2 1 0 1 0 0 0 0 0.000 0 0
+ 286 323 286 344 9999 9999
+-6
+6 288 80 372 154
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 139 369 139 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 128 369 128 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 117 369 117 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 107 369 107 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 96 369 96 9999 9999
+2 2 0 1 0 0 0 0 0.000 0 0
+ 292 84 369 84 369 150 292 150 292 84 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 326 43 19 19 326 43 340 57
+1 3 0 1 0 0 0 0 0.000 1 0.000 316 462 9 9 316 462 322 469
+1 3 0 1 0 0 0 0 0.000 1 0.000 356 202 9 9 356 202 362 209
+2 1 2 1 0 0 0 0 3.000 0 0
+ 154 239 659 239 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 484 349 459 349 459 464 444 464 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 484 359 324 449 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 318 360 362 361 314 450 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 318 351 452 351 452 460 444 459 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 326 63 326 84 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 358 91 492 91 492 200 484 199 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 358 100 402 101 354 190 9999 9999
+2 1 0 2 0 0 0 0 0.000 0 1
+ 0 0 2.000 8.000 16.000
+ 333 462 387 462 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 317 472 430 472 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 316 453 430 453 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 356 193 470 193 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 357 212 470 212 9999 9999
+2 1 0 2 0 0 0 0 0.000 0 1
+ 0 0 2.000 8.000 16.000
+ 373 202 427 202 9999 9999
+4 1 0 11 0 0 0 0.000 1 7 24 509 274 Child
+4 0 0 12 0 0 0 0.000 1 9 33 269 275 Parent
+4 0 0 12 0 0 0 0.000 1 9 33 309 15 Parent
+4 0 0 12 0 0 0 0.000 1 9 26 399 469 PIPE
+4 0 0 12 0 0 0 0.000 1 9 26 434 209 PIPE
diff --git a/share/doc/psd/20.ipctut/fig3.pic b/share/doc/psd/20.ipctut/fig3.pic
new file mode 100644
index 0000000..15a4a73
--- /dev/null
+++ b/share/doc/psd/20.ipctut/fig3.pic
@@ -0,0 +1,69 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" @(#)fig3.pic 8.1 (Berkeley) 8/14/93
+.PS
+.ps
+.ps 10
+ellipse at 5.787,8.012 wid 0.275 ht 0.275
+ellipse at 4.175,8.012 wid 0.275 ht 0.275
+dashwid = 0.037i
+line dotted from 5.550,8.012 to 4.362,8.012
+line from 4.462,8.037 to 4.362,8.012 to 4.462,7.987
+line dotted from 4.362,7.950 to 5.550,7.950
+line from 5.450,7.925 to 5.550,7.950 to 5.450,7.975
+ellipse at 3.737,4.763 wid 0.275 ht 0.275
+ellipse at 5.350,4.763 wid 0.275 ht 0.275
+line dotted from 3.925,4.700 to 5.112,4.700
+line from 5.013,4.675 to 5.112,4.700 to 5.013,4.725
+line dotted from 5.112,4.763 to 3.925,4.763
+line from 4.025,4.788 to 3.925,4.763 to 4.025,4.737
+line from 5.963,5.513 to 6.925,5.513
+line from 5.963,5.650 to 6.925,5.650
+line from 5.963,5.787 to 6.925,5.787
+line from 5.963,5.912 to 6.925,5.912
+line from 5.963,6.050 to 6.925,6.050
+line from 5.963,6.200 to 6.925,6.200 to 6.925,5.375 to 5.963,5.375 to 5.963,6.200
+ellipse at 6.388,6.713 wid 0.475 ht 0.475
+line from 6.388,6.463 to 6.388,6.200
+line from 3.150,6.200 to 4.112,6.200 to 4.112,5.375 to 3.150,5.375 to 3.150,6.200
+line from 3.150,6.050 to 4.112,6.050
+line from 3.150,5.912 to 4.112,5.912
+line from 3.150,5.787 to 4.112,5.787
+line from 3.150,5.650 to 4.112,5.650
+line from 3.150,5.513 to 4.112,5.513
+ellipse at 3.575,6.713 wid 0.475 ht 0.475
+line from 3.575,6.463 to 3.575,6.200
+line from 3.650,8.762 to 4.612,8.762
+line from 3.650,8.900 to 4.612,8.900
+line from 3.650,9.037 to 4.612,9.037
+line from 3.650,9.162 to 4.612,9.162
+line from 3.650,9.300 to 4.612,9.300
+line from 3.650,9.450 to 4.612,9.450 to 4.612,8.625 to 3.650,8.625 to 3.650,9.450
+ellipse at 4.075,9.963 wid 0.475 ht 0.475
+line from 3.975,6.112 to 5.650,6.112 to 5.650,4.750 to 5.550,4.763
+line from 5.652,4.775 to 5.550,4.763 to 5.646,4.725
+line from 6.050,6.138 to 5.737,6.138 to 5.737,4.700 to 5.550,4.700
+line from 5.650,4.725 to 5.550,4.700 to 5.650,4.675
+line dotted from 1.925,7.513 to 8.238,7.513
+line from 6.050,6.013 to 4.050,4.888
+line from 4.125,4.958 to 4.050,4.888 to 4.149,4.915
+line from 3.975,6.000 to 4.525,5.987 to 3.925,4.875
+line from 3.950,4.975 to 3.925,4.875 to 3.994,4.951
+line from 4.075,9.713 to 4.075,9.450
+line from 4.475,9.363 to 6.150,9.363 to 6.150,8.000 to 6.050,8.012
+line from 6.152,8.025 to 6.050,8.012 to 6.146,7.975
+line from 4.475,9.250 to 5.025,9.238 to 4.425,8.125
+line from 4.450,8.225 to 4.425,8.125 to 4.494,8.201
+.ps
+.ps 11
+.ft
+.ft R
+"Child" at 6.362,7.106
+.ps
+.ps 12
+"Parent" at 3.362,7.096 ljust
+"Parent" at 3.862,10.346 ljust
+.ps
+.ft
+.PE
diff --git a/share/doc/psd/20.ipctut/fig3.xfig b/share/doc/psd/20.ipctut/fig3.xfig
new file mode 100644
index 0000000..ed65b70
--- /dev/null
+++ b/share/doc/psd/20.ipctut/fig3.xfig
@@ -0,0 +1,100 @@
+#FIG 2.0
+80 2
+6 309 184 479 214
+1 3 0 1 0 0 0 0 0.000 1 0.000 463 199 11 11 463 199 468 209
+1 3 0 1 0 0 0 0 0.000 1 0.000 334 199 11 11 334 199 339 209
+2 1 2 1 0 0 0 0 3.000 1 0
+ 0 0 1.000 4.000 8.000
+ 444 199 349 199 9999 9999
+2 1 2 1 0 0 0 0 3.000 1 0
+ 0 0 1.000 4.000 8.000
+ 349 204 444 204 9999 9999
+-6
+6 274 444 444 474
+1 3 0 1 0 0 0 0 0.000 1 0.000 299 459 11 11 299 459 304 469
+1 3 0 1 0 0 0 0 0.000 1 0.000 428 459 11 11 428 459 433 469
+2 1 2 1 0 0 0 0 3.000 1 0
+ 0 0 1.000 4.000 8.000
+ 314 464 409 464 9999 9999
+2 1 2 1 0 0 0 0 3.000 1 0
+ 0 0 1.000 4.000 8.000
+ 409 459 314 459 9999 9999
+-6
+6 414 279 589 424
+6 473 340 557 414
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 399 554 399 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 388 554 388 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 377 554 377 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 367 554 367 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 356 554 356 9999 9999
+2 2 0 1 0 0 0 0 0.000 0 0
+ 477 344 554 344 554 410 477 410 477 344 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 511 303 19 19 511 303 525 317
+2 1 0 1 0 0 0 0 0.000 0 0
+ 511 323 511 344 9999 9999
+-6
+6 189 279 364 424
+6 248 340 332 414
+2 2 0 1 0 0 0 0 0.000 0 0
+ 252 344 329 344 329 410 252 410 252 344 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 356 329 356 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 367 329 367 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 377 329 377 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 388 329 388 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 399 329 399 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 286 303 19 19 286 303 300 317
+2 1 0 1 0 0 0 0 0.000 0 0
+ 286 323 286 344 9999 9999
+-6
+6 288 80 372 154
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 139 369 139 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 128 369 128 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 117 369 117 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 107 369 107 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 292 96 369 96 9999 9999
+2 2 0 1 0 0 0 0 0.000 0 0
+ 292 84 369 84 369 150 292 150 292 84 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 326 43 19 19 326 43 340 57
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 318 351 452 351 452 460 444 459 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 484 349 459 349 459 464 444 464 9999 9999
+2 1 2 1 0 0 0 0 3.000 0 0
+ 154 239 659 239 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 484 359 324 449 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 318 360 362 361 314 450 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 326 63 326 84 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 358 91 492 91 492 200 484 199 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 358 100 402 101 354 190 9999 9999
+4 1 0 11 0 0 0 0.000 1 7 24 509 274 Child
+4 0 0 12 0 0 0 0.000 1 9 33 269 275 Parent
+4 0 0 12 0 0 0 0.000 1 9 33 309 15 Parent
diff --git a/share/doc/psd/20.ipctut/fig8.pic b/share/doc/psd/20.ipctut/fig8.pic
new file mode 100644
index 0000000..92b8833
--- /dev/null
+++ b/share/doc/psd/20.ipctut/fig8.pic
@@ -0,0 +1,79 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" @(#)fig8.pic 8.1 (Berkeley) 8/14/93
+.PS
+.ps
+.ps 11
+.ft
+.ft R
+"Process 1" at 3.800,7.106 rjust
+"Process 2" at 6.612,7.106 rjust
+.ps
+.ps 10
+line from 3.150,6.200 to 4.112,6.200 to 4.112,5.375 to 3.150,5.375 to 3.150,6.200
+line from 3.150,6.050 to 4.112,6.050
+line from 3.150,5.912 to 4.112,5.912
+line from 3.150,5.787 to 4.112,5.787
+line from 3.150,5.650 to 4.112,5.650
+line from 3.150,5.513 to 4.112,5.513
+ellipse at 3.575,6.713 wid 0.475 ht 0.475
+line from 3.575,6.463 to 3.575,6.200
+line from 5.963,5.513 to 6.925,5.513
+line from 5.963,5.650 to 6.925,5.650
+line from 5.963,5.787 to 6.925,5.787
+line from 5.963,5.912 to 6.925,5.912
+line from 5.963,6.050 to 6.925,6.050
+line from 5.963,6.200 to 6.925,6.200 to 6.925,5.375 to 5.963,5.375 to 5.963,6.200
+ellipse at 6.388,6.713 wid 0.475 ht 0.475
+line from 6.388,6.463 to 6.388,6.200
+line from 3.087,8.637 to 4.050,8.637
+line from 3.087,8.775 to 4.050,8.775
+line from 3.087,8.912 to 4.050,8.912
+line from 3.087,9.037 to 4.050,9.037
+line from 3.087,9.175 to 4.050,9.175
+line from 3.087,9.325 to 4.050,9.325 to 4.050,8.500 to 3.087,8.500 to 3.087,9.325
+ellipse at 3.513,9.838 wid 0.475 ht 0.475
+line from 3.513,9.588 to 3.513,9.325
+line from 5.900,9.325 to 6.862,9.325 to 6.862,8.500 to 5.900,8.500 to 5.900,9.325
+line from 5.900,9.175 to 6.862,9.175
+line from 5.900,9.037 to 6.862,9.037
+line from 5.900,8.912 to 6.862,8.912
+line from 5.900,8.775 to 6.862,8.775
+line from 5.900,8.637 to 6.862,8.637
+ellipse at 6.325,9.838 wid 0.475 ht 0.475
+line from 6.325,9.588 to 6.325,9.325
+.ps
+.ps 11
+"Process 2" at 6.550,10.231 rjust
+"Process 1" at 3.737,10.231 rjust
+.ps
+.ps 10
+ellipse at 6.112,4.888 wid 0.275 ht 0.275
+ellipse at 5.350,4.763 wid 0.275 ht 0.275
+ellipse at 3.737,4.763 wid 0.275 ht 0.275
+ellipse at 4.550,7.950 wid 0.275 ht 0.275
+ellipse at 5.487,7.950 wid 0.275 ht 0.275
+line from 6.050,6.013 to 5.175,6.013 to 5.987,5.013
+line from 5.905,5.074 to 5.987,5.013 to 5.944,5.106
+line from 6.050,6.138 to 5.737,6.138 to 5.737,4.700 to 5.550,4.700
+line from 5.650,4.725 to 5.550,4.700 to 5.650,4.675
+dashwid = 0.037i
+line dotted from 1.925,7.513 to 8.238,7.513
+line from 3.975,6.000 to 4.525,5.987 to 3.925,4.875
+line from 3.950,4.975 to 3.925,4.875 to 3.994,4.951
+line dotted from 5.112,4.763 to 3.925,4.763
+line from 4.025,4.788 to 3.925,4.763 to 4.025,4.737
+line dotted from 3.925,4.700 to 5.112,4.700
+line from 5.013,4.675 to 5.112,4.700 to 5.013,4.725
+line from 6.050,9.012 to 5.487,9.012 to 5.487,8.137
+line from 5.462,8.237 to 5.487,8.137 to 5.513,8.237
+line from 3.737,9.137 to 4.550,9.137 to 4.550,8.137
+line from 4.525,8.237 to 4.550,8.137 to 4.575,8.237
+.ps
+.ps 11
+"NAME" at 6.737,4.918 rjust
+"NAME" at 6.112,8.043 rjust
+.ps
+.ft
+.PE
diff --git a/share/doc/psd/20.ipctut/fig8.xfig b/share/doc/psd/20.ipctut/fig8.xfig
new file mode 100644
index 0000000..f1a5257
--- /dev/null
+++ b/share/doc/psd/20.ipctut/fig8.xfig
@@ -0,0 +1,116 @@
+#FIG 2.0
+80 2
+6 224 254 589 279
+4 2 0 11 0 0 0 0.000 1 7 38 304 274 Process 1
+4 2 0 11 0 0 0 0.000 1 7 38 529 274 Process 2
+-6
+6 189 279 364 424
+6 248 340 332 414
+2 2 0 1 0 0 0 0 0.000 0 0
+ 252 344 329 344 329 410 252 410 252 344 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 356 329 356 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 367 329 367 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 377 329 377 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 388 329 388 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 252 399 329 399 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 286 303 19 19 286 303 300 317
+2 1 0 1 0 0 0 0 0.000 0 0
+ 286 323 286 344 9999 9999
+-6
+6 414 279 589 424
+6 473 340 557 414
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 399 554 399 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 388 554 388 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 377 554 377 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 367 554 367 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 477 356 554 356 9999 9999
+2 2 0 1 0 0 0 0 0.000 0 0
+ 477 344 554 344 554 410 477 410 477 344 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 511 303 19 19 511 303 525 317
+2 1 0 1 0 0 0 0 0.000 0 0
+ 511 323 511 344 9999 9999
+-6
+6 184 29 359 174
+6 243 90 327 164
+2 1 0 1 0 0 0 0 0.000 0 0
+ 247 149 324 149 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 247 138 324 138 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 247 127 324 127 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 247 117 324 117 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 247 106 324 106 9999 9999
+2 2 0 1 0 0 0 0 0.000 0 0
+ 247 94 324 94 324 160 247 160 247 94 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 281 53 19 19 281 53 295 67
+2 1 0 1 0 0 0 0 0.000 0 0
+ 281 73 281 94 9999 9999
+-6
+6 409 29 584 174
+6 468 90 552 164
+2 2 0 1 0 0 0 0 0.000 0 0
+ 472 94 549 94 549 160 472 160 472 94 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 472 106 549 106 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 472 117 549 117 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 472 127 549 127 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 472 138 549 138 9999 9999
+2 1 0 1 0 0 0 0 0.000 0 0
+ 472 149 549 149 9999 9999
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 506 53 19 19 506 53 520 67
+2 1 0 1 0 0 0 0 0.000 0 0
+ 506 73 506 94 9999 9999
+-6
+6 219 4 584 29
+4 2 0 11 0 0 0 0.000 1 7 38 524 24 Process 2
+4 2 0 11 0 0 0 0.000 1 7 38 299 24 Process 1
+-6
+1 3 0 1 0 0 0 0 0.000 1 0.000 489 449 11 11 489 449 494 459
+1 3 0 1 0 0 0 0 0.000 1 0.000 428 459 11 11 428 459 433 469
+1 3 0 1 0 0 0 0 0.000 1 0.000 299 459 11 11 299 459 304 469
+1 3 0 1 0 0 0 0 0.000 1 0.000 364 204 11 11 364 204 369 214
+1 3 0 1 0 0 0 0 0.000 1 0.000 439 204 11 11 439 204 444 214
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 484 359 414 359 479 439 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 484 349 459 349 459 464 444 464 9999 9999
+2 1 2 1 0 0 0 0 3.000 0 0
+ 154 239 659 239 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 318 360 362 361 314 450 9999 9999
+2 1 2 1 0 0 0 0 3.000 1 0
+ 0 0 1.000 4.000 8.000
+ 409 459 314 459 9999 9999
+2 1 2 1 0 0 0 0 3.000 1 0
+ 0 0 1.000 4.000 8.000
+ 314 464 409 464 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 484 119 439 119 439 189 9999 9999
+2 1 0 1 0 0 0 0 0.000 1 0
+ 0 0 1.000 4.000 8.000
+ 299 109 364 109 364 189 9999 9999
+4 2 0 11 0 0 0 0.000 1 7 32 539 449 NAME
+4 2 0 11 0 0 0 0.000 1 7 32 489 199 NAME
diff --git a/share/doc/psd/20.ipctut/pipe.c b/share/doc/psd/20.ipctut/pipe.c
new file mode 100644
index 0000000..3f482f3
--- /dev/null
+++ b/share/doc/psd/20.ipctut/pipe.c
@@ -0,0 +1,74 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)pipe.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <stdio.h>
+
+#define DATA "Bright star, would I were steadfast as thou art . . ."
+
+/*
+ * This program creates a pipe, then forks. The child communicates to the
+ * parent over the pipe. Notice that a pipe is a one-way communications
+ * device. I can write to the output socket (sockets[1], the second socket
+ * of the array returned by pipe()) and read from the input socket
+ * (sockets[0]), but not vice versa.
+ */
+
+main()
+{
+ int sockets[2], child;
+
+ /* Create a pipe */
+ if (pipe(sockets) < 0) {
+ perror("opening stream socket pair");
+ exit(10);
+ }
+
+ if ((child = fork()) == -1)
+ perror("fork");
+ else if (child) {
+ char buf[1024];
+
+ /* This is still the parent. It reads the child's message. */
+ close(sockets[1]);
+ if (read(sockets[0], buf, 1024) < 0)
+ perror("reading message");
+ printf("-->%s\en", buf);
+ close(sockets[0]);
+ } else {
+ /* This is the child. It writes a message to its parent. */
+ close(sockets[0]);
+ if (write(sockets[1], DATA, sizeof(DATA)) < 0)
+ perror("writing message");
+ close(sockets[1]);
+ }
+}
diff --git a/share/doc/psd/20.ipctut/socketpair.c b/share/doc/psd/20.ipctut/socketpair.c
new file mode 100644
index 0000000..afc5c90
--- /dev/null
+++ b/share/doc/psd/20.ipctut/socketpair.c
@@ -0,0 +1,77 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)socketpair.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <stdio.h>
+
+#define DATA1 "In Xanadu, did Kublai Khan . . ."
+#define DATA2 "A stately pleasure dome decree . . ."
+
+/*
+ * This program creates a pair of connected sockets then forks and
+ * communicates over them. This is very similar to communication with pipes,
+ * however, socketpairs are two-way communications objects. Therefore I can
+ * send messages in both directions.
+ */
+
+main()
+{
+ int sockets[2], child;
+ char buf[1024];
+
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) < 0) {
+ perror("opening stream socket pair");
+ exit(1);
+ }
+
+ if ((child = fork()) == -1)
+ perror("fork");
+ else if (child) { /* This is the parent. */
+ close(sockets[0]);
+ if (read(sockets[1], buf, 1024, 0) < 0)
+ perror("reading stream message");
+ printf("-->%s\en", buf);
+ if (write(sockets[1], DATA2, sizeof(DATA2)) < 0)
+ perror("writing stream message");
+ close(sockets[1]);
+ } else { /* This is the child. */
+ close(sockets[1]);
+ if (write(sockets[0], DATA1, sizeof(DATA1)) < 0)
+ perror("writing stream message");
+ if (read(sockets[0], buf, 1024, 0) < 0)
+ perror("reading stream message");
+ printf("-->%s\en", buf);
+ close(sockets[0]);
+ }
+}
diff --git a/share/doc/psd/20.ipctut/strchkread.c b/share/doc/psd/20.ipctut/strchkread.c
new file mode 100644
index 0000000..dcab1fc
--- /dev/null
+++ b/share/doc/psd/20.ipctut/strchkread.c
@@ -0,0 +1,106 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)strchkread.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <stdio.h>
+#define TRUE 1
+
+/*
+ * This program uses select() to check that someone is trying to connect
+ * before calling accept().
+ */
+
+main()
+{
+ int sock, length;
+ struct sockaddr_in server;
+ int msgsock;
+ char buf[1024];
+ int rval;
+ fd_set ready;
+ struct timeval to;
+
+ /* Create socket */
+ sock = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock < 0) {
+ perror("opening stream socket");
+ exit(1);
+ }
+ /* Name socket using wildcards */
+ server.sin_family = AF_INET;
+ server.sin_addr.s_addr = INADDR_ANY;
+ server.sin_port = 0;
+ if (bind(sock, &server, sizeof(server))) {
+ perror("binding stream socket");
+ exit(1);
+ }
+ /* Find out assigned port number and print it out */
+ length = sizeof(server);
+ if (getsockname(sock, &server, &length)) {
+ perror("getting socket name");
+ exit(1);
+ }
+ printf("Socket has port #%d\en", ntohs(server.sin_port));
+
+ /* Start accepting connections */
+ listen(sock, 5);
+ do {
+ FD_ZERO(&ready);
+ FD_SET(sock, &ready);
+ to.tv_sec = 5;
+ if (select(sock + 1, &ready, 0, 0, &to) < 0) {
+ perror("select");
+ continue;
+ }
+ if (FD_ISSET(sock, &ready)) {
+ msgsock = accept(sock, (struct sockaddr *)0, (int *)0);
+ if (msgsock == -1)
+ perror("accept");
+ else do {
+ bzero(buf, sizeof(buf));
+ if ((rval = read(msgsock, buf, 1024)) < 0)
+ perror("reading stream message");
+ else if (rval == 0)
+ printf("Ending connection\en");
+ else
+ printf("-->%s\en", buf);
+ } while (rval > 0);
+ close(msgsock);
+ } else
+ printf("Do something else\en");
+ } while (TRUE);
+}
diff --git a/share/doc/psd/20.ipctut/streamread.c b/share/doc/psd/20.ipctut/streamread.c
new file mode 100644
index 0000000..139a269
--- /dev/null
+++ b/share/doc/psd/20.ipctut/streamread.c
@@ -0,0 +1,102 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)streamread.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <stdio.h>
+#define TRUE 1
+
+/*
+ * This program creates a socket and then begins an infinite loop. Each time
+ * through the loop it accepts a connection and prints out messages from it.
+ * When the connection breaks, or a termination message comes through, the
+ * program accepts a new connection.
+ */
+
+main()
+{
+ int sock, length;
+ struct sockaddr_in server;
+ int msgsock;
+ char buf[1024];
+ int rval;
+ int i;
+
+ /* Create socket */
+ sock = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock < 0) {
+ perror("opening stream socket");
+ exit(1);
+ }
+ /* Name socket using wildcards */
+ server.sin_family = AF_INET;
+ server.sin_addr.s_addr = INADDR_ANY;
+ server.sin_port = 0;
+ if (bind(sock, &server, sizeof(server))) {
+ perror("binding stream socket");
+ exit(1);
+ }
+ /* Find out assigned port number and print it out */
+ length = sizeof(server);
+ if (getsockname(sock, &server, &length)) {
+ perror("getting socket name");
+ exit(1);
+ }
+ printf("Socket has port #%d\en", ntohs(server.sin_port));
+
+ /* Start accepting connections */
+ listen(sock, 5);
+ do {
+ msgsock = accept(sock, 0, 0);
+ if (msgsock == -1)
+ perror("accept");
+ else do {
+ bzero(buf, sizeof(buf));
+ if ((rval = read(msgsock, buf, 1024)) < 0)
+ perror("reading stream message");
+ i = 0;
+ if (rval == 0)
+ printf("Ending connection\en");
+ else
+ printf("-->%s\en", buf);
+ } while (rval != 0);
+ close(msgsock);
+ } while (TRUE);
+ /*
+ * Since this program has an infinite loop, the socket "sock" is
+ * never explicitly closed. However, all sockets will be closed
+ * automatically when a process is killed or terminates normally.
+ */
+}
diff --git a/share/doc/psd/20.ipctut/streamwrite.c b/share/doc/psd/20.ipctut/streamwrite.c
new file mode 100644
index 0000000..db4daaf
--- /dev/null
+++ b/share/doc/psd/20.ipctut/streamwrite.c
@@ -0,0 +1,81 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)streamwrite.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <stdio.h>
+
+#define DATA "Half a league, half a league . . ."
+
+/*
+ * This program creates a socket and initiates a connection with the socket
+ * given in the command line. One message is sent over the connection and
+ * then the socket is closed, ending the connection. The form of the command
+ * line is streamwrite hostname portnumber
+ */
+
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+ int sock;
+ struct sockaddr_in server;
+ struct hostent *hp, *gethostbyname();
+ char buf[1024];
+
+ /* Create socket */
+ sock = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock < 0) {
+ perror("opening stream socket");
+ exit(1);
+ }
+ /* Connect socket using name specified by command line. */
+ server.sin_family = AF_INET;
+ hp = gethostbyname(argv[1]);
+ if (hp == 0) {
+ fprintf(stderr, "%s: unknown host\n", argv[1]);
+ exit(2);
+ }
+ bcopy(hp->h_addr, &server.sin_addr, hp->h_length);
+ server.sin_port = htons(atoi(argv[2]));
+
+ if (connect(sock, &server, sizeof(server)) < 0) {
+ perror("connecting stream socket");
+ exit(1);
+ }
+ if (write(sock, DATA, sizeof(DATA)) < 0)
+ perror("writing on stream socket");
+ close(sock);
+}
diff --git a/share/doc/psd/20.ipctut/tutor.me b/share/doc/psd/20.ipctut/tutor.me
new file mode 100644
index 0000000..fba4583
--- /dev/null
+++ b/share/doc/psd/20.ipctut/tutor.me
@@ -0,0 +1,939 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)tutor.me 8.1 (Berkeley) 8/14/93
+.\"
+.oh 'Introductory 4.4BSD IPC''PSD:20-%'
+.eh 'PSD:20-%''Introductory 4.4BSD IPC'
+.rs
+.sp 2
+.sz 14
+.ft B
+.ce 2
+An Introductory 4.4BSD
+Interprocess Communication Tutorial
+.sz 10
+.sp 2
+.ce
+.i "Stuart Sechrest"
+.ft
+.sp
+.ce 4
+Computer Science Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+.sp 2
+.ce
+.i ABSTRACT
+.sp
+.(c
+.pp
+Berkeley UNIX\(dg 4.4BSD offers several choices for interprocess communication.
+To aid the programmer in developing programs which are comprised of
+cooperating
+processes, the different choices are discussed and a series of example
+programs are presented. These programs
+demonstrate in a simple way the use of pipes, socketpairs, sockets
+and the use of datagram and stream communication. The intent of this
+document is to present a few simple example programs, not to describe the
+networking system in full.
+.)c
+.sp 2
+.(f
+\(dg\|UNIX is a trademark of AT&T Bell Laboratories.
+.)f
+.b
+.sh 1 "Goals"
+.r
+.pp
+Facilities for interprocess communication (IPC) and networking
+were a major addition to UNIX in the Berkeley UNIX 4.2BSD release.
+These facilities required major additions and some changes
+to the system interface.
+The basic idea of this interface is to make IPC similar to file I/O.
+In UNIX a process has a set of I/O descriptors, from which one reads
+and to which one writes.
+Descriptors may refer to normal files, to devices (including terminals),
+or to communication channels.
+The use of a descriptor has three phases: its creation,
+its use for reading and writing, and its destruction. By using descriptors
+to write files, rather than simply naming the target file in the write
+call, one gains a surprising amount of flexibility. Often, the program that
+creates a descriptor will be different from the program that uses the
+descriptor. For example the shell can create a descriptor for the output
+of the `ls'
+command that will cause the listing to appear in a file rather than
+on a terminal.
+Pipes are another form of descriptor that have been used in UNIX
+for some time.
+Pipes allow one-way data transmission from one process
+to another; the two processes and the pipe must be set up by a common
+ancestor.
+.pp
+The use of descriptors is not the only communication interface
+provided by UNIX.
+The signal mechanism sends a tiny amount of information from one
+process to another.
+The signaled process receives only the signal type,
+not the identity of the sender,
+and the number of possible signals is small.
+The signal semantics limit the flexibility of the signaling mechanism
+as a means of interprocess communication.
+.pp
+The identification of IPC with I/O is quite longstanding in UNIX and
+has proved quite successful. At first, however, IPC was limited to
+processes communicating within a single machine. With Berkeley UNIX
+4.2BSD this expanded to include IPC between machines. This expansion
+has necessitated some change in the way that descriptors are created.
+Additionally, new possibilities for the meaning of read and write have
+been admitted. Originally the meanings, or semantics, of these terms
+were fairly simple. When you wrote something it was delivered. When
+you read something, you were blocked until the data arrived.
+Other possibilities exist,
+however. One can write without full assurance of delivery if one can
+check later to catch occasional failures. Messages can be kept as
+discrete units or merged into a stream.
+One can ask to read, but insist on not waiting if nothing is immediately
+available. These new possibilities are allowed in the Berkeley UNIX IPC
+interface.
+.pp
+Thus Berkeley UNIX 4.4BSD offers several choices for IPC.
+This paper presents simple examples that illustrate some of
+the choices.
+The reader is presumed to be familiar with the C programming language
+[Kernighan & Ritchie 1978],
+but not necessarily with the system calls of the UNIX system or with
+processes and interprocess communication.
+The paper reviews the notion of a process and the types of
+communication that are supported by Berkeley UNIX 4.4BSD.
+A series of examples are presented that create processes that communicate
+with one another. The programs show different ways of establishing
+channels of communication.
+Finally, the calls that actually transfer data are reviewed.
+To clearly present how communication can take place,
+the example programs have been cleared of anything that
+might be construed as useful work.
+They can, therefore, serve as models
+for the programmer trying to construct programs which are comprised of
+cooperating processes.
+.b
+.sh 1 "Processes"
+.pp
+A \fIprogram\fP is both a sequence of statements and a rough way of referring
+to the computation that occurs when the compiled statements are run.
+A \fIprocess\fP can be thought of as a single line of control in a program.
+Most programs execute some statements, go through a few loops, branch in
+various directions and then end. These are single process programs.
+Programs can also have a point where control splits into two independent lines,
+an action called \fIforking.\fP
+In UNIX these lines can never join again. A call to the system routine
+\fIfork()\fP, causes a process to split in this way.
+The result of this call is that two independent processes will be
+running, executing exactly the same code.
+Memory values will be the same for all values set before the fork, but,
+subsequently, each version will be able to change only the
+value of its own copy of each variable.
+Initially, the only difference between the two will be the value returned by
+\fIfork().\fP The parent will receive a process id for the child,
+the child will receive a zero.
+Calls to \fIfork(),\fP
+therefore, typically precede, or are included in, an if-statement.
+.pp
+A process views the rest of the system through a private table of descriptors.
+The descriptors can represent open files or sockets (sockets are communication
+objects that will be discussed below). Descriptors are referred to
+by their index numbers in the table. The first three descriptors are often
+known by special names, \fI stdin, stdout\fP and \fIstderr\fP.
+These are the standard input, output and error.
+When a process forks, its descriptor table is copied to the child.
+Thus, if the parent's standard input is being taken from a terminal
+(devices are also treated as files in UNIX), the child's input will
+be taken from the
+same terminal. Whoever reads first will get the input. If, before forking,
+the parent changes its standard input so that it is reading from a
+new file, the child will take its input from the new file. It is
+also possible to take input from a socket, rather than from a file.
+.b
+.sh 1 "Pipes"
+.r
+.pp
+Most users of UNIX know that they can pipe the output of a
+program ``prog1'' to the input of another, ``prog2,'' by typing the command
+\fI``prog1 | prog2.''\fP
+This is called ``piping'' the output of one program
+to another because the mechanism used to transfer the output is called a
+pipe.
+When the user types a command, the command is read by the shell, which
+decides how to execute it. If the command is simple, for example,
+.i "``prog1,''"
+the shell forks a process, which executes the program, prog1, and then dies.
+The shell waits for this termination and then prompts for the next
+command.
+If the command is a compound command,
+.i "``prog1 | prog2,''"
+the shell creates two processes connected by a pipe. One process
+runs the program, prog1, the other runs prog2. The pipe is an I/O
+mechanism with two ends, or sockets. Data that is written into one socket
+can be read from the other.
+.(z
+.ft CW
+.so pipe.c
+.ft
+.ce 1
+Figure 1\ \ Use of a pipe
+.)z
+.pp
+Since a program specifies its input and output only by the descriptor table
+indices, which appear as variables or constants,
+the input source and output destination can be changed without
+changing the text of the program.
+It is in this way that the shell is able to set up pipes. Before executing
+prog1, the process can close whatever is at \fIstdout\fP
+and replace it with one
+end of a pipe. Similarly, the process that will execute prog2 can substitute
+the opposite end of the pipe for
+\fIstdin.\fP
+.pp
+Let us now examine a program that creates a pipe for communication between
+its child and itself (Figure 1).
+A pipe is created by a parent process, which then forks.
+When a process forks, the parent's descriptor table is copied into
+the child's.
+.pp
+In Figure 1, the parent process makes a call to the system routine
+\fIpipe().\fP
+This routine creates a pipe and places descriptors for the sockets
+for the two ends of the pipe in the process's descriptor table.
+\fIPipe()\fP
+is passed an array into which it places the index numbers of the
+sockets it created.
+The two ends are not equivalent. The socket whose index is
+returned in the low word of the array is opened for reading only,
+while the socket in the high end is opened only for writing.
+This corresponds to the fact that the standard input is the first
+descriptor of a process's descriptor table and the standard output
+is the second. After creating the pipe, the parent creates the child
+with which it will share the pipe by calling \fIfork().\fP
+Figure 2 illustrates the effect of a fork.
+The parent process's descriptor table points to both ends of the pipe.
+After the fork, both parent's and child's descriptor tables point to
+the pipe.
+The child can then use the pipe to send a message to the parent.
+.(z
+.so fig2.pic
+.ce 2
+Figure 2\ \ Sharing a pipe between parent and child
+.ce 0
+.)z
+.pp
+Just what is a pipe?
+It is a one-way communication mechanism, with one end opened
+for reading and the other end for writing.
+Therefore, parent and child need to agree on which way to turn
+the pipe, from parent to child or the other way around.
+Using the same pipe for communication both from parent to child and
+from child to parent would be possible (since both processes have
+references to both ends), but very complicated.
+If the parent and child are to have a two-way conversation,
+the parent creates two pipes, one for use in each direction.
+(In accordance with their plans, both parent and child in the example above
+close the socket that they will not use. It is not required that unused
+descriptors be closed, but it is good practice.)
+A pipe is also a \fIstream\fP communication mechanism; that
+is, all messages sent through the pipe are placed in order
+and reliably delivered. When the reader asks for a certain
+number of bytes from this
+stream, he is given as many bytes as are available, up
+to the amount of the request. Note that these bytes may have come from
+the same call to \fIwrite()\fR or from several calls to \fIwrite()\fR
+which were concatenated.
+.b
+.sh 1 "Socketpairs"
+.r
+.pp
+Berkeley UNIX 4.4BSD provides a slight generalization of pipes. A pipe is a
+pair of connected sockets for one-way stream communication. One may
+obtain a pair of connected sockets for two-way stream communication
+by calling the routine \fIsocketpair().\fP
+The program in Figure 3 calls \fIsocketpair()\fP
+to create such a connection. The program uses the link for
+communication in both directions. Since socketpairs are
+an extension of pipes, their use resembles that of pipes.
+Figure 4 illustrates the result of a fork following a call to
+\fIsocketpair().\fP
+.pp
+\fISocketpair()\fP
+takes as
+arguments a specification of a domain, a style of communication, and a
+protocol.
+These are the parameters shown in the example.
+Domains and protocols will be discussed in the next section.
+Briefly,
+a domain is a space of names that may be bound
+to sockets and implies certain other conventions.
+Currently, socketpairs have only been implemented for one
+domain, called the UNIX domain.
+The UNIX domain uses UNIX path names for naming sockets.
+It only allows communication
+between sockets on the same machine.
+.pp
+Note that the header files
+.i "<sys/socket.h>"
+and
+.i "<sys/types.h>."
+are required in this program.
+The constants AF_UNIX and SOCK_STREAM are defined in
+.i "<sys/socket.h>,"
+which in turn requires the file
+.i "<sys/types.h>"
+for some of its definitions.
+.(z
+.ft CW
+.so socketpair.c
+.ft
+.ce 1
+Figure 3\ \ Use of a socketpair
+.)z
+.(z
+.so fig3.pic
+.ce 1
+Figure 4\ \ Sharing a socketpair between parent and child
+.)z
+.b
+.sh 1 "Domains and Protocols"
+.r
+.pp
+Pipes and socketpairs are a simple solution for communicating between
+a parent and child or between child processes.
+What if we wanted to have processes that have no common ancestor
+with whom to set up communication?
+Neither standard UNIX pipes nor socketpairs are
+the answer here, since both mechanisms require a common ancestor to
+set up the communication.
+We would like to have two processes separately create sockets
+and then have messages sent between them. This is often the
+case when providing or using a service in the system. This is
+also the case when the communicating processes are on separate machines.
+In Berkeley UNIX 4.4BSD one can create individual sockets, give them names and
+send messages between them.
+.pp
+Sockets created by different programs use names to refer to one another;
+names generally must be translated into addresses for use.
+The space from which an address is drawn is referred to as a
+.i domain.
+There are several domains for sockets.
+Two that will be used in the examples here are the UNIX domain (or AF_UNIX,
+for Address Format UNIX) and the Internet domain (or AF_INET).
+UNIX domain IPC is an experimental facility in 4.2BSD and 4.3BSD.
+In the UNIX domain, a socket is given a path name within the file system
+name space.
+A file system node is created for the socket and other processes may
+then refer to the socket by giving the proper pathname.
+UNIX domain names, therefore, allow communication between any two processes
+that work in the same file system.
+The Internet domain is the UNIX implementation of the DARPA Internet
+standard protocols IP/TCP/UDP.
+Addresses in the Internet domain consist of a machine network address
+and an identifying number, called a port.
+Internet domain names allow communication between machines.
+.pp
+Communication follows some particular ``style.''
+Currently, communication is either through a \fIstream\fP
+or by \fIdatagram.\fP
+Stream communication implies several things. Communication takes
+place across a connection between two sockets. The communication
+is reliable, error-free, and, as in pipes, no message boundaries are
+kept. Reading from a stream may result in reading the data sent from
+one or several calls to \fIwrite()\fP
+or only part of the data from a single call, if there is not enough room
+for the entire message, or if not all the data from a large message
+has been transferred.
+The protocol implementing such a style will retransmit messages
+received with errors. It will also return error messages if one tries to
+send a message after the connection has been broken.
+Datagram communication does not use connections. Each message is
+addressed individually. If the address is correct, it will generally
+be received, although this is not guaranteed. Often datagrams are
+used for requests that require a response from the
+recipient. If no response
+arrives in a reasonable amount of time, the request is repeated.
+The individual datagrams will be kept separate when they are read, that
+is, message boundaries are preserved.
+.pp
+The difference in performance between the two styles of communication is
+generally less important than the difference in semantics. The
+performance gain that one might find in using datagrams must be weighed
+against the increased complexity of the program, which must now concern
+itself with lost or out of order messages. If lost messages may simply be
+ignored, the quantity of traffic may be a consideration. The expense
+of setting up a connection is best justified by frequent use of the connection.
+Since the performance of a protocol changes as it is tuned for different
+situations, it is best to seek the most up-to-date information when
+making choices for a program in which performance is crucial.
+.pp
+A protocol is a set of rules, data formats and conventions that regulate the
+transfer of data between participants in the communication.
+In general, there is one protocol for each socket type (stream,
+datagram, etc.) within each domain.
+The code that implements a protocol
+keeps track of the names that are bound to sockets,
+sets up connections and transfers data between sockets,
+perhaps sending the data across a network.
+This code also keeps track of the names that are bound to sockets.
+It is possible for several protocols, differing only in low level
+details, to implement the same style of communication within
+a particular domain. Although it is possible to select
+which protocol should be used, for nearly all uses it is sufficient to
+request the default protocol. This has been done in all of the example
+programs.
+.pp
+One specifies the domain, style and protocol of a socket when
+it is created. For example, in Figure 5a the call to \fIsocket()\fP
+causes the creation of a datagram socket with the default protocol
+in the UNIX domain.
+.b
+.sh 1 "Datagrams in the UNIX Domain"
+.r
+.(z
+.ft CW
+.so udgramread.c
+.ft
+.ce 1
+Figure 5a\ \ Reading UNIX domain datagrams
+.)z
+.pp
+Let us now look at two programs that create sockets separately.
+The programs in Figures 5a and 5b use datagram communication
+rather than a stream.
+The structure used to name UNIX domain sockets is defined
+in the file \fI<sys/un.h>.\fP
+The definition has also been included in the example for clarity.
+.pp
+Each program creates a socket with a call to \fIsocket().\fP
+These sockets are in the UNIX domain.
+Once a name has been decided upon it is attached to a socket by the
+system call \fIbind().\fP
+The program in Figure 5a uses the name ``socket'',
+which it binds to its socket.
+This name will appear in the working directory of the program.
+The routines in Figure 5b use its
+socket only for sending messages. It does not create a name for
+the socket because no other process has to refer to it.
+.(z
+.ft CW
+.so udgramsend.c
+.ft
+.ce 1
+Figure 5b\ \ Sending a UNIX domain datagrams
+.)z
+.pp
+Names in the UNIX domain are path names. Like file path names they may
+be either absolute (e.g. ``/dev/imaginary'') or relative (e.g. ``socket'').
+Because these names are used to allow processes to rendezvous, relative
+path names can pose difficulties and should be used with care.
+When a name is bound into the name space, a file (inode) is allocated in the
+file system. If
+the inode is not deallocated, the name will continue to exist even after
+the bound socket is closed. This can cause subsequent runs of a program
+to find that a name is unavailable, and can cause
+directories to fill up with these
+objects. The names are removed by calling \fIunlink()\fP or using
+the \fIrm\fP\|(1) command.
+Names in the UNIX domain are only used for rendezvous. They are not used
+for message delivery once a connection is established. Therefore, in
+contrast with the Internet domain, unbound sockets need not be (and are
+not) automatically given addresses when they are connected.
+.pp
+There is no established means of communicating names to interested
+parties. In the example, the program in Figure 5b gets the
+name of the socket to which it will send its message through its
+command line arguments. Once a line of communication has been created,
+one can send the names of additional, perhaps new, sockets over the link.
+Facilities will have to be built that will make the distribution of
+names less of a problem than it now is.
+.b
+.sh 1 "Datagrams in the Internet Domain"
+.r
+.(z
+.ft CW
+.so dgramread.c
+.ft
+.ce 1
+Figure 6a\ \ Reading Internet domain datagrams
+.)z
+.pp
+The examples in Figure 6a and 6b are very close to the previous example
+except that the socket is in the Internet domain.
+The structure of Internet domain addresses is defined in the file
+\fI<netinet/in.h>\fP.
+Internet addresses specify a host address (a 32-bit number)
+and a delivery slot, or port, on that
+machine. These ports are managed by the system routines that implement
+a particular protocol.
+Unlike UNIX domain names, Internet socket names are not entered into
+the file system and, therefore,
+they do not have to be unlinked after the socket has been closed.
+When a message must be sent between machines it is sent to
+the protocol routine on the destination machine, which interprets the
+address to determine to which socket the message should be delivered.
+Several different protocols may be active on
+the same machine, but, in general, they will not communicate with one another.
+As a result, different protocols are allowed to use the same port numbers.
+Thus, implicitly, an Internet address is a triple including a protocol as
+well as the port and machine address.
+An \fIassociation\fP is a temporary or permanent specification
+of a pair of communicating sockets.
+An association is thus identified by the tuple
+<\fIprotocol, local machine address, local port,
+remote machine address, remote port\fP>.
+An association may be transient when using datagram sockets;
+the association actually exists during a \fIsend\fP operation.
+.(z
+.ft CW
+.so dgramsend.c
+.ft
+.ce 1
+Figure 6b\ \ Sending an Internet domain datagram
+.)z
+.pp
+The protocol for a socket is chosen when the socket is created. The
+local machine address for a socket can be any valid network address of the
+machine, if it has more than one, or it can be the wildcard value
+INADDR_ANY.
+The wildcard value is used in the program in Figure 6a.
+If a machine has several network addresses, it is likely
+that messages sent to any of the addresses should be deliverable to
+a socket. This will be the case if the wildcard value has been chosen.
+Note that even if the wildcard value is chosen, a program sending messages
+to the named socket must specify a valid network address. One can be willing
+to receive from ``anywhere,'' but one cannot send a message ``anywhere.''
+The program in Figure 6b is given the destination host name as a command
+line argument.
+To determine a network address to which it can send the message, it looks
+up
+the host address by the call to \fIgethostbyname()\fP.
+The returned structure includes the host's network address,
+which is copied into the structure specifying the
+destination of the message.
+.pp
+The port number can be thought of as the number of a mailbox, into
+which the protocol places one's messages. Certain daemons, offering
+certain advertised services, have reserved
+or ``well-known'' port numbers. These fall in the range
+from 1 to 1023. Higher numbers are available to general users.
+Only servers need to ask for a particular number.
+The system will assign an unused port number when an address
+is bound to a socket.
+This may happen when an explicit \fIbind\fP
+call is made with a port number of 0, or
+when a \fIconnect\fP or \fIsend\fP
+is performed on an unbound socket.
+Note that port numbers are not automatically reported back to the user.
+After calling \fIbind(),\fP asking for port 0, one may call
+\fIgetsockname()\fP to discover what port was actually assigned.
+The routine \fIgetsockname()\fP
+will not work for names in the UNIX domain.
+.pp
+The format of the socket address is specified in part by standards within the
+Internet domain. The specification includes the order of the bytes in
+the address. Because machines differ in the internal representation
+they ordinarily use
+to represent integers, printing out the port number as returned by
+\fIgetsockname()\fP may result in a misinterpretation. To
+print out the number, it is necessary to use the routine \fIntohs()\fP
+(for \fInetwork to host: short\fP) to convert the number from the
+network representation to the host's representation. On some machines,
+such as 68000-based machines, this is a null operation. On others,
+such as VAXes, this results in a swapping of bytes. Another routine
+exists to convert a short integer from the host format to the network format,
+called \fIhtons()\fP; similar routines exist for long integers.
+For further information, refer to the
+entry for \fIbyteorder\fP in section 3 of the manual.
+.b
+.sh 1 "Connections"
+.r
+.pp
+To send data between stream sockets (having communication style SOCK_STREAM),
+the sockets must be connected.
+Figures 7a and 7b show two programs that create such a connection.
+The program in 7a is relatively simple.
+To initiate a connection, this program simply creates
+a stream socket, then calls \fIconnect()\fP,
+specifying the address of the socket to which
+it wishes its socket connected. Provided that the target socket exists and
+is prepared to handle a connection, connection will be complete,
+and the program can begin to send
+messages. Messages will be delivered in order without message
+boundaries, as with pipes. The connection is destroyed when either
+socket is closed (or soon thereafter). If a process persists
+in sending messages after the connection is closed, a SIGPIPE signal
+is sent to the process by the operating system. Unless explicit action
+is taken to handle the signal (see the manual page for \fIsignal\fP
+or \fIsigvec\fP),
+the process will terminate and the shell
+will print the message ``broken pipe.''
+.(z
+.ft CW
+.so streamwrite.c
+.ft
+.ce 1
+Figure 7a\ \ Initiating an Internet domain stream connection
+.)z
+.(z
+.ft CW
+.so streamread.c
+.ft
+.ce 1
+Figure 7b\ \ Accepting an Internet domain stream connection
+.sp 2
+.ft CW
+.so strchkread.c
+.ft
+.ce 1
+Figure 7c\ \ Using select() to check for pending connections
+.)z
+.(z
+.so fig8.pic
+.sp
+.ce 1
+Figure 8\ \ Establishing a stream connection
+.)z
+.pp
+Forming a connection is asymmetrical; one process, such as the
+program in Figure 7a, requests a connection with a particular socket,
+the other process accepts connection requests.
+Before a connection can be accepted a socket must be created and an address
+bound to it. This
+situation is illustrated in the top half of Figure 8. Process 2
+has created a socket and bound a port number to it. Process 1 has created an
+unnamed socket.
+The address bound to process 2's socket is then made known to process 1 and,
+perhaps to several other potential communicants as well.
+If there are several possible communicants,
+this one socket might receive several requests for connections.
+As a result, a new socket is created for each connection. This new socket
+is the endpoint for communication within this process for this connection.
+A connection may be destroyed by closing the corresponding socket.
+.pp
+The program in Figure 7b is a rather trivial example of a server. It
+creates a socket to which it binds a name, which it then advertises.
+(In this case it prints out the socket number.) The program then calls
+\fIlisten()\fP for this socket.
+Since several clients may attempt to connect more or less
+simultaneously, a queue of pending connections is maintained in the system
+address space. \fIListen()\fP
+marks the socket as willing to accept connections and initializes the queue.
+When a connection is requested, it is listed in the queue. If the
+queue is full, an error status may be returned to the requester.
+The maximum length of this queue is specified by the second argument of
+\fIlisten()\fP; the maximum length is limited by the system.
+Once the listen call has been completed, the program enters
+an infinite loop. On each pass through the loop, a new connection is
+accepted and removed from the queue, and, hence, a new socket for the
+connection is created. The bottom half of Figure 8 shows the result of
+Process 1 connecting with the named socket of Process 2, and Process 2
+accepting the connection. After the connection is created, the
+service, in this case printing out the messages, is performed and the
+connection socket closed. The \fIaccept()\fP
+call will take a pending connection
+request from the queue if one is available, or block waiting for a request.
+Messages are read from the connection socket.
+Reads from an active connection will normally block until data is available.
+The number of bytes read is returned. When a connection is destroyed,
+the read call returns immediately. The number of bytes returned will
+be zero.
+.pp
+The program in Figure 7c is a slight variation on the server in Figure 7b.
+It avoids blocking when there are no pending connection requests by
+calling \fIselect()\fP
+to check for pending requests before calling \fIaccept().\fP
+This strategy is useful when connections may be received
+on more than one socket, or when data may arrive on other connected
+sockets before another connection request.
+.pp
+The programs in Figures 9a and 9b show a program using stream communication
+in the UNIX domain. Streams in the UNIX domain can be used for this sort
+of program in exactly the same way as Internet domain streams, except for
+the form of the names and the restriction of the connections to a single
+file system. There are some differences, however, in the functionality of
+streams in the two domains, notably in the handling of
+\fIout-of-band\fP data (discussed briefly below). These differences
+are beyond the scope of this paper.
+.(z
+.ft CW
+.so ustreamwrite.c
+.ft
+.ce 1
+Figure 9a\ \ Initiating a UNIX domain stream connection
+.sp 2
+.ft CW
+.so ustreamread.c
+.ft
+.ce 1
+Figure 9b\ \ Accepting a UNIX domain stream connection
+.)z
+.b
+.sh 1 "Reads, Writes, Recvs, etc."
+.r
+.pp
+UNIX 4.4BSD has several system calls for reading and writing information.
+The simplest calls are \fIread() \fP and \fIwrite().\fP \fIWrite()\fP
+takes as arguments the index of a descriptor, a pointer to a buffer
+containing the data and the size of the data.
+The descriptor may indicate either a file or a connected socket.
+``Connected'' can mean either a connected stream socket (as described
+in Section 8) or a datagram socket for which a \fIconnect()\fP
+call has provided a default destination (see the \fIconnect()\fP manual page).
+\fIRead()\fP also takes a descriptor that indicates either a file or a socket.
+\fIWrite()\fP requires a connected socket since no destination is
+specified in the parameters of the system call.
+\fIRead()\fP can be used for either a connected or an unconnected socket.
+These calls are, therefore, quite flexible and may be used to
+write applications that require no assumptions about the source of
+their input or the destination of their output.
+There are variations on \fIread() \fP and \fIwrite()\fP
+that allow the source and destination of the input and output to use
+several separate buffers, while retaining the flexibility to handle
+both files and sockets. These are \fIreadv()\fP and \fI writev(),\fP
+for read and write \fIvector.\fP
+.pp
+It is sometimes necessary to send high priority data over a
+connection that may have unread low priority data at the
+other end. For example, a user interface process may be interpreting
+commands and sending them on to another process through a stream connection.
+The user interface may have filled the stream with as yet unprocessed
+requests when the user types
+a command to cancel all outstanding requests.
+Rather than have the high priority data wait
+to be processed after the low priority data, it is possible to
+send it as \fIout-of-band\fP
+(OOB) data. The notification of pending OOB data results in the generation of
+a SIGURG signal, if this signal has been enabled (see the manual
+page for \fIsignal\fP or \fIsigvec\fP).
+See [Leffler 1986] for a more complete description of the OOB mechanism.
+There are a pair of calls similar to \fIread\fP and \fIwrite\fP
+that allow options, including sending
+and receiving OOB information; these are \fI send()\fP
+and \fIrecv().\fP
+These calls are used only with sockets; specifying a descriptor for a file will
+result in the return of an error status. These calls also allow
+\fIpeeking\fP at data in a stream.
+That is, they allow a process to read data without removing the data from
+the stream. One use of this facility is to read ahead in a stream
+to determine the size of the next item to be read.
+When not using these options, these calls have the same functions as
+\fIread()\fP and \fIwrite().\fP
+.pp
+To send datagrams, one must be allowed to specify the destination.
+The call \fIsendto()\fP
+takes a destination address as an argument and is therefore used for
+sending datagrams. The call \fIrecvfrom()\fP
+is often used to read datagrams, since this call returns the address
+of the sender, if it is available, along with the data.
+If the identity of the sender does not matter, one may use \fIread()\fP
+or \fIrecv().\fP
+.pp
+Finally, there are a pair of calls that allow the sending and
+receiving of messages from multiple buffers, when the address of the
+recipient must be specified. These are \fIsendmsg()\fP and
+\fIrecvmsg().\fP
+These calls are actually quite general and have other uses,
+including, in the UNIX domain, the transmission of a file descriptor from one
+process to another.
+.pp
+The various options for reading and writing are shown in Figure 10,
+together with their parameters. The parameters for each system call
+reflect the differences in function of the different calls.
+In the examples given in this paper, the calls \fIread()\fP and
+\fIwrite()\fP have been used whenever possible.
+.(z
+.ft CW
+ /*
+ * The variable descriptor may be the descriptor of either a file
+ * or of a socket.
+ */
+ cc = read(descriptor, buf, nbytes)
+ int cc, descriptor; char *buf; int nbytes;
+
+ /*
+ * An iovec can include several source buffers.
+ */
+ cc = readv(descriptor, iov, iovcnt)
+ int cc, descriptor; struct iovec *iov; int iovcnt;
+
+ cc = write(descriptor, buf, nbytes)
+ int cc, descriptor; char *buf; int nbytes;
+
+ cc = writev(descriptor, iovec, ioveclen)
+ int cc, descriptor; struct iovec *iovec; int ioveclen;
+
+ /*
+ * The variable ``sock'' must be the descriptor of a socket.
+ * Flags may include MSG_OOB and MSG_PEEK.
+ */
+ cc = send(sock, msg, len, flags)
+ int cc, sock; char *msg; int len, flags;
+
+ cc = sendto(sock, msg, len, flags, to, tolen)
+ int cc, sock; char *msg; int len, flags;
+ struct sockaddr *to; int tolen;
+
+ cc = sendmsg(sock, msg, flags)
+ int cc, sock; struct msghdr msg[]; int flags;
+
+ cc = recv(sock, buf, len, flags)
+ int cc, sock; char *buf; int len, flags;
+
+ cc = recvfrom(sock, buf, len, flags, from, fromlen)
+ int cc, sock; char *buf; int len, flags;
+ struct sockaddr *from; int *fromlen;
+
+ cc = recvmsg(sock, msg, flags)
+ int cc, socket; struct msghdr msg[]; int flags;
+.ft
+.sp 1
+.ce 1
+Figure 10\ \ Varieties of read and write commands
+.)z
+.b
+.sh 1 "Choices"
+.r
+.pp
+This paper has presented examples of some of the forms
+of communication supported by
+Berkeley UNIX 4.4BSD. These have been presented in an order chosen for
+ease of presentation. It is useful to review these options emphasizing the
+factors that make each attractive.
+.pp
+Pipes have the advantage of portability, in that they are supported in all
+UNIX systems. They also are relatively
+simple to use. Socketpairs share this simplicity and have the additional
+advantage of allowing bidirectional communication. The major shortcoming
+of these mechanisms is that they require communicating processes to be
+descendants of a common process. They do not allow intermachine communication.
+.pp
+The two communication domains, UNIX and Internet, allow processes with no common
+ancestor to communicate.
+Of the two, only the Internet domain allows
+communication between machines.
+This makes the Internet domain a necessary
+choice for processes running on separate machines.
+.pp
+The choice between datagrams and stream communication is best made by
+carefully considering the semantic and performance
+requirements of the application.
+Streams can be both advantageous and disadvantageous. One disadvantage
+is that a process is only allowed a limited number of open streams,
+as there are usually only 64 entries available in the open descriptor
+table. This can cause problems if a single server must talk with a large
+number of clients.
+Another is that for delivering a short message the stream setup and
+teardown time can be unnecessarily long. Weighed against this are
+the reliability built into the streams. This will often be the
+deciding factor in favor of streams.
+.b
+.sh 1 "What to do Next"
+.r
+.pp
+Many of the examples presented here can serve as models for multiprocess
+programs and for programs distributed across several machines.
+In developing a new multiprocess program, it is often easiest to
+first write the code to create the processes and communication paths.
+After this code is debugged, the code specific to the application can
+be added.
+.pp
+An introduction to the UNIX system and programming using UNIX system calls
+can be found in [Kernighan and Pike 1984].
+Further documentation of the Berkeley UNIX 4.4BSD IPC mechanisms can be
+found in [Leffler et al. 1986].
+More detailed information about particular calls and protocols
+is provided in sections
+2, 3 and 4 of the
+UNIX Programmer's Manual [CSRG 1986].
+In particular the following manual pages are relevant:
+.(b
+.TS
+l l.
+creating and naming sockets socket(2), bind(2)
+establishing connections listen(2), accept(2), connect(2)
+transferring data read(2), write(2), send(2), recv(2)
+addresses inet(4F)
+protocols tcp(4P), udp(4P).
+.TE
+.)b
+.(b
+.sp
+.b
+Acknowledgements
+.pp
+I would like to thank Sam Leffler and Mike Karels for their help in
+understanding the IPC mechanisms and all the people whose comments
+have helped in writing and improving this report.
+.pp
+This work was sponsored by the Defense Advanced Research Projects Agency
+(DoD), ARPA Order No. 4031, monitored by the Naval Electronics Systems
+Command under contract No. N00039-C-0235.
+The views and conclusions contained in this document are those of the
+author and should not be interpreted as representing official policies,
+either expressed or implied, of the Defense Research Projects Agency
+or of the US Government.
+.)b
+.(b
+.sp
+.b
+References
+.r
+.sp
+.ls 1
+B.W. Kernighan & R. Pike, 1984,
+.i "The UNIX Programming Environment."
+Englewood Cliffs, N.J.: Prentice-Hall.
+.sp
+.ls 1
+B.W. Kernighan & D.M. Ritchie, 1978,
+.i "The C Programming Language,"
+Englewood Cliffs, N.J.: Prentice-Hall.
+.sp
+.ls 1
+S.J. Leffler, R.S. Fabry, W.N. Joy, P. Lapsley, S. Miller & C. Torek, 1986,
+.i "An Advanced 4.4BSD Interprocess Communication Tutorial."
+Computer Systems Research Group,
+Department of Electrical Engineering and Computer Science,
+University of California, Berkeley.
+.sp
+.ls 1
+Computer Systems Research Group, 1986,
+.i "UNIX Programmer's Manual, 4.4 Berkeley Software Distribution."
+Computer Systems Research Group,
+Department of Electrical Engineering and Computer Science,
+University of California, Berkeley.
+.)b
diff --git a/share/doc/psd/20.ipctut/udgramread.c b/share/doc/psd/20.ipctut/udgramread.c
new file mode 100644
index 0000000..1b6aa3b
--- /dev/null
+++ b/share/doc/psd/20.ipctut/udgramread.c
@@ -0,0 +1,80 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)udgramread.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+/*
+ * In the included file <sys/un.h> a sockaddr_un is defined as follows
+ * struct sockaddr_un {
+ * short sun_family;
+ * char sun_path[108];
+ * };
+ */
+
+#include <stdio.h>
+
+#define NAME "socket"
+
+/*
+ * This program creates a UNIX domain datagram socket, binds a name to it,
+ * then reads from the socket.
+ */
+main()
+{
+ int sock, length;
+ struct sockaddr_un name;
+ char buf[1024];
+
+ /* Create socket from which to read. */
+ sock = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("opening datagram socket");
+ exit(1);
+ }
+ /* Create name. */
+ name.sun_family = AF_UNIX;
+ strcpy(name.sun_path, NAME);
+ if (bind(sock, &name, sizeof(struct sockaddr_un))) {
+ perror("binding name to datagram socket");
+ exit(1);
+ }
+ printf("socket -->%s\en", NAME);
+ /* Read from the socket */
+ if (read(sock, buf, 1024) < 0)
+ perror("receiving datagram packet");
+ printf("-->%s\en", buf);
+ close(sock);
+ unlink(NAME);
+}
diff --git a/share/doc/psd/20.ipctut/udgramsend.c b/share/doc/psd/20.ipctut/udgramsend.c
new file mode 100644
index 0000000..5e2b147
--- /dev/null
+++ b/share/doc/psd/20.ipctut/udgramsend.c
@@ -0,0 +1,68 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)udgramsend.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+
+#define DATA "The sea is calm tonight, the tide is full . . ."
+
+/*
+ * Here I send a datagram to a receiver whose name I get from the command
+ * line arguments. The form of the command line is udgramsend pathname
+ */
+
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+ int sock;
+ struct sockaddr_un name;
+
+ /* Create socket on which to send. */
+ sock = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("opening datagram socket");
+ exit(1);
+ }
+ /* Construct name of socket to send to. */
+ name.sun_family = AF_UNIX;
+ strcpy(name.sun_path, argv[1]);
+ /* Send message. */
+ if (sendto(sock, DATA, sizeof(DATA), 0,
+ &name, sizeof(struct sockaddr_un)) < 0) {
+ perror("sending datagram message");
+ }
+ close(sock);
+}
diff --git a/share/doc/psd/20.ipctut/ustreamread.c b/share/doc/psd/20.ipctut/ustreamread.c
new file mode 100644
index 0000000..9faf5aa
--- /dev/null
+++ b/share/doc/psd/20.ipctut/ustreamread.c
@@ -0,0 +1,96 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)ustreamread.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+
+#define NAME "socket"
+
+/*
+ * This program creates a socket in the UNIX domain and binds a name to it.
+ * After printing the socket's name it begins a loop. Each time through the
+ * loop it accepts a connection and prints out messages from it. When the
+ * connection breaks, or a termination message comes through, the program
+ * accepts a new connection.
+ */
+main()
+{
+ int sock, msgsock, rval;
+ struct sockaddr_un server;
+ char buf[1024];
+
+ /* Create socket */
+ sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0) {
+ perror("opening stream socket");
+ exit(1);
+ }
+ /* Name socket using file system name */
+ server.sun_family = AF_UNIX;
+ strcpy(server.sun_path, NAME);
+ if (bind(sock, &server, sizeof(struct sockaddr_un))) {
+ perror("binding stream socket");
+ exit(1);
+ }
+ printf("Socket has name %s\en", server.sun_path);
+ /* Start accepting connections */
+ listen(sock, 5);
+ for (;;) {
+ msgsock = accept(sock, 0, 0);
+ if (msgsock == -1)
+ perror("accept");
+ else do {
+ bzero(buf, sizeof(buf));
+ if ((rval = read(msgsock, buf, 1024)) < 0)
+ perror("reading stream message");
+ else if (rval == 0)
+ printf("Ending connection\en");
+ else
+ printf("-->%s\en", buf);
+ } while (rval > 0);
+ close(msgsock);
+ }
+ /*
+ * The following statements are not executed, because they follow an
+ * infinite loop. However, most ordinary programs will not run
+ * forever. In the UNIX domain it is necessary to tell the file
+ * system that one is through using NAME. In most programs one uses
+ * the call unlink() as below. Since the user will have to kill this
+ * program, it will be necessary to remove the name by a command from
+ * the shell.
+ */
+ close(sock);
+ unlink(NAME);
+}
diff --git a/share/doc/psd/20.ipctut/ustreamwrite.c b/share/doc/psd/20.ipctut/ustreamwrite.c
new file mode 100644
index 0000000..e356e50
--- /dev/null
+++ b/share/doc/psd/20.ipctut/ustreamwrite.c
@@ -0,0 +1,71 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)ustreamwrite.c 8.1 (Berkeley) 6/8/93
+.\"
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+
+#define DATA "Half a league, half a league . . ."
+
+/*
+ * This program connects to the socket named in the command line and sends a
+ * one line message to that socket. The form of the command line is
+ * ustreamwrite pathname
+ */
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+ int sock;
+ struct sockaddr_un server;
+ char buf[1024];
+
+ /* Create socket */
+ sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0) {
+ perror("opening stream socket");
+ exit(1);
+ }
+ /* Connect socket using name specified by command line. */
+ server.sun_family = AF_UNIX;
+ strcpy(server.sun_path, argv[1]);
+
+ if (connect(sock, &server, sizeof(struct sockaddr_un)) < 0) {
+ close(sock);
+ perror("connecting stream socket");
+ exit(1);
+ }
+ if (write(sock, DATA, sizeof(DATA)) < 0)
+ perror("writing on stream socket");
+}
diff --git a/share/doc/psd/21.ipc/0.t b/share/doc/psd/21.ipc/0.t
new file mode 100644
index 0000000..d28199a
--- /dev/null
+++ b/share/doc/psd/21.ipc/0.t
@@ -0,0 +1,93 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 8.1 (Berkeley) 6/8/93
+.\"
+.EH 'PSD:21-%''Advanced 4.4BSD IPC Tutorial'
+.OH 'Advanced 4.4BSD IPC Tutorial''PSD:21-%'
+.ds lq ``
+.ds rq ''
+.de DT
+.if t .ta .5i 1.25i 2.5i 3.75i
+.\" 3.5i went to 3.8i
+.if n .ta .7i 1.75i 3.8i
+..
+.bd S B 3
+.TL
+An Advanced 4.4BSD Interprocess Communication Tutorial
+.AU
+Samuel J. Leffler
+.AU
+Robert S. Fabry
+.AU
+William N. Joy
+.AU
+Phil Lapsley
+.AI
+Computer Systems Research Group
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+.sp 2
+.AU
+Steve Miller
+.AU
+Chris Torek
+.AI
+Heterogeneous Systems Laboratory
+Department of Computer Science
+University of Maryland, College Park
+College Park, Maryland 20742
+.de IR
+\fI\\$1\fP\\$2
+..
+.de UX
+UNIX\\$1
+..
+.AB
+.PP
+.FS
+* \s-2UNIX\s0 is a trademark of UNIX System Laboratories, Inc.
+in the US and some other countries.
+.FE
+This document provides an introduction to the interprocess
+communication facilities included in the
+4.4BSD release of the
+.UX *
+system.
+.PP
+It discusses the overall model for interprocess communication
+and introduces the interprocess communication primitives
+which have been added to the system. The majority of the
+document considers the use of these primitives in developing
+applications. The reader is expected to be familiar with
+the C programming language as all examples are written in C.
+.AE
diff --git a/share/doc/psd/21.ipc/1.t b/share/doc/psd/21.ipc/1.t
new file mode 100644
index 0000000..f4e48ff
--- /dev/null
+++ b/share/doc/psd/21.ipc/1.t
@@ -0,0 +1,106 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 8.1 (Berkeley) 8/14/93
+.\"
+.\".ds LH "4.4BSD IPC Primer
+.\".ds RH Introduction
+.\".ds RF "Leffler/Fabry/Joy
+.\".ds LF "\*(DY
+.\".ds CF "
+.nr H1 1
+.LP
+.bp
+.LG
+.B
+.ce
+1. INTRODUCTION
+.sp 2
+.R
+.NL
+One of the most important additions to UNIX in 4.2BSD was interprocess
+communication.
+These facilities were the result of
+more than two years of discussion and research. The facilities
+provided in 4.2BSD incorporated many of the ideas from current
+research, while trying to maintain the UNIX philosophy of
+simplicity and conciseness.
+The 4.3BSD release of Berkeley UNIX
+improved upon some of the IPC facilities
+while providing an upward-compatible interface.
+4.4BSD adds support for ISO protocols and IP multicasting.
+The BSD interprocess communication
+facilities have become a defacto standard for UNIX.
+.PP
+UNIX has previously been very weak in the area of interprocess
+communication. Prior to the 4BSD facilities, the only
+standard mechanism which allowed two processes to communicate were
+pipes (the mpx files which were part of Version 7 were
+experimental). Unfortunately, pipes are very restrictive
+in that
+the two communicating processes must be related through a
+common ancestor.
+Further, the semantics of pipes makes them almost impossible
+to maintain in a distributed environment.
+.PP
+Earlier attempts at extending the IPC facilities of UNIX have
+met with mixed reaction. The majority of the problems have
+been related to the fact that these facilities have been tied to
+the UNIX file system, either through naming or implementation.
+Consequently, the IPC facilities provided in 4.2BSD were
+designed as a totally independent subsystem. The BSD IPC
+allows processes to rendezvous in many ways.
+Processes may rendezvous through a UNIX file system-like
+name space (a space where all names are path names)
+as well as through a
+network name space. In fact, new name spaces may
+be added at a future time with only minor changes visible
+to users. Further, the communication facilities
+have been extended to include more than the simple byte stream
+provided by a pipe. These extensions have resulted
+in a completely new part of the system which users will need
+time to familiarize themselves with. It is likely that as
+more use is made of these facilities they will be refined;
+only time will tell.
+.PP
+This document provides a high-level description
+of the IPC facilities in 4.4BSD and their use.
+It is designed to complement the manual pages for the IPC primitives
+by examples of their use.
+The remainder of this document is organized in four sections.
+Section 2 introduces the IPC-related system calls and the basic model
+of communication. Section 3 describes some of the supporting
+library routines users may find useful in constructing distributed
+applications. Section 4 is concerned with the client/server model
+used in developing applications and includes examples of the
+two major types of servers. Section 5 delves into advanced topics
+which sophisticated users are likely to encounter when using
+the IPC facilities.
diff --git a/share/doc/psd/21.ipc/2.t b/share/doc/psd/21.ipc/2.t
new file mode 100644
index 0000000..6f08454
--- /dev/null
+++ b/share/doc/psd/21.ipc/2.t
@@ -0,0 +1,714 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.t 8.1 (Berkeley) 8/14/93
+.\"
+.\".ds RH "Basics
+.bp
+.nr H1 2
+.nr H2 0
+.\" The next line is a major hack to get around internal changes in the groff
+.\" implementation of .NH.
+.nr nh*hl 1
+.bp
+.LG
+.B
+.ce
+2. BASICS
+.sp 2
+.R
+.NL
+.PP
+The basic building block for communication is the \fIsocket\fP.
+A socket is an endpoint of communication to which a name may
+be \fIbound\fP. Each socket in use has a \fItype\fP
+and one or more associated processes. Sockets exist within
+\fIcommunication domains\fP.
+A communication domain is an
+abstraction introduced to bundle common properties of
+processes communicating through sockets.
+One such property is the scheme used to name sockets. For
+example, in the UNIX communication domain sockets are
+named with UNIX path names; e.g. a
+socket may be named \*(lq/dev/foo\*(rq. Sockets normally
+exchange data only with
+sockets in the same domain (it may be possible to cross domain
+boundaries, but only if some translation process is
+performed). The
+4.4BSD IPC facilities support four separate communication domains:
+the UNIX domain, for on-system communication;
+the Internet domain, which is used by
+processes which communicate
+using the Internet standard communication protocols;
+the NS domain, which is used by processes which
+communicate using the Xerox standard communication
+protocols*;
+.FS
+* See \fIInternet Transport Protocols\fP, Xerox System Integration
+Standard (XSIS)028112 for more information. This document is
+almost a necessity for one trying to write NS applications.
+.FE
+and the ISO OSI protocols, which are not documented in this tutorial.
+The underlying communication
+facilities provided by these domains have a significant influence
+on the internal system implementation as well as the interface to
+socket facilities available to a user. An example of the
+latter is that a socket \*(lqoperating\*(rq in the UNIX domain
+sees a subset of the error conditions which are possible
+when operating in the Internet (or NS) domain.
+.NH 2
+Socket types
+.PP
+Sockets are
+typed according to the communication properties visible to a
+user.
+Processes are presumed to communicate only between sockets of
+the same type, although there is
+nothing that prevents communication between sockets of different
+types should the underlying communication
+protocols support this.
+.PP
+Four types of sockets currently are available to a user.
+A \fIstream\fP socket provides for the bidirectional, reliable,
+sequenced, and unduplicated flow of data without record boundaries.
+Aside from the bidirectionality of data flow, a pair of connected
+stream sockets provides an interface nearly identical to that of pipes\(dg.
+.FS
+\(dg In the UNIX domain, in fact, the semantics are identical and,
+as one might expect, pipes have been implemented internally
+as simply a pair of connected stream sockets.
+.FE
+.PP
+A \fIdatagram\fP socket supports bidirectional flow of data which
+is not promised to be sequenced, reliable, or unduplicated.
+That is, a process
+receiving messages on a datagram socket may find messages duplicated,
+and, possibly,
+in an order different from the order in which it was sent.
+An important characteristic of a datagram
+socket is that record boundaries in data are preserved. Datagram
+sockets closely model the facilities found in many contemporary
+packet switched networks such as the Ethernet.
+.PP
+A \fIraw\fP socket provides users access to
+the underlying communication
+protocols which support socket abstractions.
+These sockets are normally datagram oriented, though their
+exact characteristics are dependent on the interface provided by
+the protocol. Raw sockets are not intended for the general user; they
+have been provided mainly for those interested in developing new
+communication protocols, or for gaining access to some of the more
+esoteric facilities of an existing protocol. The use of raw sockets
+is considered in section 5.
+.PP
+A \fIsequenced packet\fP socket is similar to a stream socket,
+with the exception that record boundaries are preserved. This
+interface is provided only as part of the NS socket abstraction,
+and is very important in most serious NS applications.
+Sequenced-packet sockets allow the user to manipulate the
+SPP or IDP headers on a packet or a group of packets either
+by writing a prototype header along with whatever data is
+to be sent, or by specifying a default header to be used with
+all outgoing data, and allows the user to receive the headers
+on incoming packets. The use of these options is considered in
+section 5.
+.PP
+Another potential socket type which has interesting properties is
+the \fIreliably delivered
+message\fP socket.
+The reliably delivered message socket has
+similar properties to a datagram socket, but with
+reliable delivery. There is currently no support for this
+type of socket, but a reliably delivered message protocol
+similar to Xerox's Packet Exchange Protocol (PEX) may be
+simulated at the user level. More information on this topic
+can be found in section 5.
+.NH 2
+Socket creation
+.PP
+To create a socket the \fIsocket\fP system call is used:
+.DS
+s = socket(domain, type, protocol);
+.DE
+This call requests that the system create a socket in the specified
+\fIdomain\fP and of the specified \fItype\fP. A particular protocol may
+also be requested. If the protocol is left unspecified (a value
+of 0), the system will select an appropriate protocol from those
+protocols which comprise the communication domain and which
+may be used to support the requested socket type. The user is
+returned a descriptor (a small integer number) which may be used
+in later system calls which operate on sockets. The domain is specified as
+one of the manifest constants defined in the file <\fIsys/socket.h\fP>.
+For the UNIX domain the constant is AF_UNIX*; for the Internet
+.FS
+* The manifest constants are named AF_whatever as they indicate
+the ``address format'' to use in interpreting names.
+.FE
+domain AF_INET; and for the NS domain, AF_NS.
+The socket types are also defined in this file
+and one of SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, or SOCK_SEQPACKET
+must be specified.
+To create a stream socket in the Internet domain the following
+call might be used:
+.DS
+s = socket(AF_INET, SOCK_STREAM, 0);
+.DE
+This call would result in a stream socket being created with the TCP
+protocol providing the underlying communication support. To
+create a datagram socket for on-machine use the call might
+be:
+.DS
+s = socket(AF_UNIX, SOCK_DGRAM, 0);
+.DE
+.PP
+The default protocol (used when the \fIprotocol\fP argument to the
+\fIsocket\fP call is 0) should be correct for most every
+situation. However, it is possible to specify a protocol
+other than the default; this will be covered in
+section 5.
+.PP
+There are several reasons a socket call may fail. Aside from
+the rare occurrence of lack of memory (ENOBUFS), a socket
+request may fail due to a request for an unknown protocol
+(EPROTONOSUPPORT), or a request for a type of socket for
+which there is no supporting protocol (EPROTOTYPE).
+.NH 2
+Binding local names
+.PP
+A socket is created without a name. Until a name is bound
+to a socket, processes have no way to reference it and, consequently,
+no messages may be received on it.
+Communicating processes are bound
+by an \fIassociation\fP. In the Internet and NS domains,
+an association
+is composed of local and foreign
+addresses, and local and foreign ports,
+while in the UNIX domain, an association is composed of
+local and foreign path names (the phrase ``foreign pathname''
+means a pathname created by a foreign process, not a pathname
+on a foreign system).
+In most domains, associations must be unique.
+In the Internet domain there
+may never be duplicate <protocol, local address, local port, foreign
+address, foreign port> tuples. UNIX domain sockets need not always
+be bound to a name, but when bound
+there may never be duplicate <protocol, local pathname, foreign
+pathname> tuples.
+The pathnames may not refer to files
+already existing on the system
+in 4.3; the situation may change in future releases.
+.PP
+The \fIbind\fP system call allows a process to specify half of
+an association, <local address, local port>
+(or <local pathname>), while the \fIconnect\fP
+and \fIaccept\fP primitives are used to complete a socket's association.
+.PP
+In the Internet domain,
+binding names to sockets can be fairly complex.
+Fortunately, it is usually not necessary to specifically bind an
+address and port number to a socket, because the
+\fIconnect\fP and \fIsend\fP calls will automatically
+bind an appropriate address if they are used with an
+unbound socket. The process of binding names to NS
+sockets is similar in most ways to that of
+binding names to Internet sockets.
+.PP
+The \fIbind\fP system call is used as follows:
+.DS
+bind(s, name, namelen);
+.DE
+The bound name is a variable length byte string which is interpreted
+by the supporting protocol(s). Its interpretation may vary from
+communication domain to communication domain (this is one of
+the properties which comprise the \*(lqdomain\*(rq).
+As mentioned, in the
+Internet domain names contain an Internet address and port
+number. NS domain names contain an NS address and
+port number. In the UNIX domain, names contain a path name and
+a family, which is always AF_UNIX. If one wanted to bind
+the name \*(lq/tmp/foo\*(rq to a UNIX domain socket, the
+following code would be used*:
+.FS
+* Note that, although the tendency here is to call the \*(lqaddr\*(rq
+structure \*(lqsun\*(rq, doing so would cause problems if the code
+were ever ported to a Sun workstation.
+.FE
+.DS
+#include <sys/un.h>
+ ...
+struct sockaddr_un addr;
+ ...
+strcpy(addr.sun_path, "/tmp/foo");
+addr.sun_family = AF_UNIX;
+bind(s, (struct sockaddr *) &addr, strlen(addr.sun_path) +
+ sizeof (addr.sun_len) + sizeof (addr.sun_family));
+.DE
+Note that in determining the size of a UNIX domain address null
+bytes are not counted, which is why \fIstrlen\fP is used. In
+the current implementation of UNIX domain IPC,
+the file name
+referred to in \fIaddr.sun_path\fP is created as a socket
+in the system file space.
+The caller must, therefore, have
+write permission in the directory where
+\fIaddr.sun_path\fP is to reside, and this file should be deleted by the
+caller when it is no longer needed. Future versions of 4BSD
+may not create this file.
+.PP
+In binding an Internet address things become more
+complicated. The actual call is similar,
+.DS
+#include <sys/types.h>
+#include <netinet/in.h>
+ ...
+struct sockaddr_in sin;
+ ...
+bind(s, (struct sockaddr *) &sin, sizeof (sin));
+.DE
+but the selection of what to place in the address \fIsin\fP
+requires some discussion. We will come back to the problem
+of formulating Internet addresses in section 3 when
+the library routines used in name resolution are discussed.
+.PP
+Binding an NS address to a socket is even more
+difficult,
+especially since the Internet library routines do not
+work with NS hostnames. The actual call is again similar:
+.DS
+#include <sys/types.h>
+#include <netns/ns.h>
+ ...
+struct sockaddr_ns sns;
+ ...
+bind(s, (struct sockaddr *) &sns, sizeof (sns));
+.DE
+Again, discussion of what to place in a \*(lqstruct sockaddr_ns\*(rq
+will be deferred to section 3.
+.NH 2
+Connection establishment
+.PP
+Connection establishment is usually asymmetric,
+with one process a \*(lqclient\*(rq and the other a \*(lqserver\*(rq.
+The server, when willing to offer its advertised services,
+binds a socket to a well-known address associated with the service
+and then passively \*(lqlistens\*(rq on its socket.
+It is then possible for an unrelated process to rendezvous
+with the server.
+The client requests services from the server by initiating a
+\*(lqconnection\*(rq to the server's socket.
+On the client side the \fIconnect\fP call is
+used to initiate a connection. Using the UNIX domain, this
+might appear as,
+.DS
+struct sockaddr_un server;
+ ...
+connect(s, (struct sockaddr *)&server, strlen(server.sun_path) +
+ sizeof (server.sun_family));
+.DE
+while in the Internet domain,
+.DS
+struct sockaddr_in server;
+ ...
+connect(s, (struct sockaddr *)&server, sizeof (server));
+.DE
+and in the NS domain,
+.DS
+struct sockaddr_ns server;
+ ...
+connect(s, (struct sockaddr *)&server, sizeof (server));
+.DE
+where \fIserver\fP in the example above would contain either the UNIX
+pathname, Internet address and port number, or NS address and
+port number of the server to which the
+client process wishes to speak.
+If the client process's socket is unbound at the time of
+the connect call,
+the system will automatically select and bind a name to
+the socket if necessary; c.f. section 5.4.
+This is the usual way that local addresses are bound
+to a socket.
+.PP
+An error is returned if the connection was unsuccessful
+(any name automatically bound by the system, however, remains).
+Otherwise, the socket is associated with the server and
+data transfer may begin. Some of the more common errors returned
+when a connection attempt fails are:
+.IP ETIMEDOUT
+.br
+After failing to establish a connection for a period of time,
+the system decided there was no point in retrying the
+connection attempt any more. This usually occurs because
+the destination host is down, or because problems in
+the network resulted in transmissions being lost.
+.IP ECONNREFUSED
+.br
+The host refused service for some reason.
+This is usually
+due to a server process
+not being present at the requested name.
+.IP "ENETDOWN or EHOSTDOWN"
+.br
+These operational errors are
+returned based on status information delivered to
+the client host by the underlying communication services.
+.IP "ENETUNREACH or EHOSTUNREACH"
+.br
+These operational errors can occur either because the network
+or host is unknown (no route to the network or host is present),
+or because of status information returned by intermediate
+gateways or switching nodes. Many times the status returned
+is not sufficient to distinguish a network being down from a
+host being down, in which case the system
+indicates the entire network is unreachable.
+.PP
+For the server to receive a client's connection it must perform
+two steps after binding its socket.
+The first is to indicate a willingness to listen for
+incoming connection requests:
+.DS
+listen(s, 5);
+.DE
+The second parameter to the \fIlisten\fP call specifies the maximum
+number of outstanding connections which may be queued awaiting
+acceptance by the server process; this number
+may be limited by the system. Should a connection be
+requested while the queue is full, the connection will not be
+refused, but rather the individual messages which comprise the
+request will be ignored. This gives a harried server time to
+make room in its pending connection queue while the client
+retries the connection request. Had the connection been returned
+with the ECONNREFUSED error, the client would be unable to tell
+if the server was up or not. As it is now it is still possible
+to get the ETIMEDOUT error back, though this is unlikely. The
+backlog figure supplied with the listen call is currently limited
+by the system to a maximum of 5 pending connections on any
+one queue. This avoids the problem of processes hogging system
+resources by setting an infinite backlog, then ignoring
+all connection requests.
+.PP
+With a socket marked as listening, a server may \fIaccept\fP
+a connection:
+.DS
+struct sockaddr_in from;
+ ...
+fromlen = sizeof (from);
+newsock = accept(s, (struct sockaddr *)&from, &fromlen);
+.DE
+(For the UNIX domain, \fIfrom\fP would be declared as a
+\fIstruct sockaddr_un\fP, and for the NS domain, \fIfrom\fP
+would be declared as a \fIstruct sockaddr_ns\fP,
+but nothing different would need
+to be done as far as \fIfromlen\fP is concerned. In the examples
+which follow, only Internet routines will be discussed.) A new
+descriptor is returned on receipt of a connection (along with
+a new socket). If the server wishes to find out who its client is,
+it may supply a buffer for the client socket's name. The value-result
+parameter \fIfromlen\fP is initialized by the server to indicate how
+much space is associated with \fIfrom\fP, then modified on return
+to reflect the true size of the name. If the client's name is not
+of interest, the second parameter may be a null pointer.
+.PP
+\fIAccept\fP normally blocks. That is, \fIaccept\fP
+will not return until a connection is available or the system call
+is interrupted by a signal to the process. Further, there is no
+way for a process to indicate it will accept connections from only
+a specific individual, or individuals. It is up to the user process
+to consider who the connection is from and close down the connection
+if it does not wish to speak to the process. If the server process
+wants to accept connections on more than one socket, or wants to avoid blocking
+on the accept call, there are alternatives; they will be considered
+in section 5.
+.NH 2
+Data transfer
+.PP
+With a connection established, data may begin to flow. To send
+and receive data there are a number of possible calls.
+With the peer entity at each end of a connection
+anchored, a user can send or receive a message without specifying
+the peer. As one might expect, in this case, then
+the normal \fIread\fP and \fIwrite\fP system calls are usable,
+.DS
+write(s, buf, sizeof (buf));
+read(s, buf, sizeof (buf));
+.DE
+In addition to \fIread\fP and \fIwrite\fP,
+the new calls \fIsend\fP and \fIrecv\fP
+may be used:
+.DS
+send(s, buf, sizeof (buf), flags);
+recv(s, buf, sizeof (buf), flags);
+.DE
+While \fIsend\fP and \fIrecv\fP are virtually identical to
+\fIread\fP and \fIwrite\fP,
+the extra \fIflags\fP argument is important. The flags,
+defined in \fI<sys/socket.h>\fP, may be
+specified as a non-zero value if one or more
+of the following is required:
+.DS
+.TS
+l l.
+MSG_OOB send/receive out of band data
+MSG_PEEK look at data without reading
+MSG_DONTROUTE send data without routing packets
+.TE
+.DE
+Out of band data is a notion specific to stream sockets, and one
+which we will not immediately consider. The option to have data
+sent without routing applied to the outgoing packets is currently
+used only by the routing table management process, and is
+unlikely to be of interest to the casual user. The ability
+to preview data is, however, of interest. When MSG_PEEK
+is specified with a \fIrecv\fP call, any data present is returned
+to the user, but treated as still \*(lqunread\*(rq. That
+is, the next \fIread\fP or \fIrecv\fP call applied to the socket will
+return the data previously previewed.
+.NH 2
+Discarding sockets
+.PP
+Once a socket is no longer of interest, it may be discarded
+by applying a \fIclose\fP to the descriptor,
+.DS
+close(s);
+.DE
+If data is associated with a socket which promises reliable delivery
+(e.g. a stream socket) when a close takes place, the system will
+continue to attempt to transfer the data.
+However, after a fairly long period of
+time, if the data is still undelivered, it will be discarded.
+Should a user have no use for any pending data, it may
+perform a \fIshutdown\fP on the socket prior to closing it.
+This call is of the form:
+.DS
+shutdown(s, how);
+.DE
+where \fIhow\fP is 0 if the user is no longer interested in reading
+data, 1 if no more data will be sent, or 2 if no data is to
+be sent or received.
+.NH 2
+Connectionless sockets
+.PP
+To this point we have been concerned mostly with sockets which
+follow a connection oriented model. However, there is also
+support for connectionless interactions typical of the datagram
+facilities found in contemporary packet switched networks.
+A datagram socket provides a symmetric interface to data
+exchange. While processes are still likely to be client
+and server, there is no requirement for connection establishment.
+Instead, each message includes the destination address.
+.PP
+Datagram sockets are created as before.
+If a particular local address is needed,
+the \fIbind\fP operation must precede the first data transmission.
+Otherwise, the system will set the local address and/or port
+when data is first sent.
+To send data, the \fIsendto\fP primitive is used,
+.DS
+sendto(s, buf, buflen, flags, (struct sockaddr *)&to, tolen);
+.DE
+The \fIs\fP, \fIbuf\fP, \fIbuflen\fP, and \fIflags\fP
+parameters are used as before.
+The \fIto\fP and \fItolen\fP
+values are used to indicate the address of the intended recipient of the
+message. When
+using an unreliable datagram interface, it is
+unlikely that any errors will be reported to the sender. When
+information is present locally to recognize a message that can
+not be delivered (for instance when a network is unreachable),
+the call will return \-1 and the global value \fIerrno\fP will
+contain an error number.
+.PP
+To receive messages on an unconnected datagram socket, the
+\fIrecvfrom\fP primitive is provided:
+.DS
+recvfrom(s, buf, buflen, flags, (struct sockaddr *)&from, &fromlen);
+.DE
+Once again, the \fIfromlen\fP parameter is handled in
+a value-result fashion, initially containing the size of
+the \fIfrom\fP buffer, and modified on return to indicate
+the actual size of the address from which the datagram was received.
+.PP
+In addition to the two calls mentioned above, datagram
+sockets may also use the \fIconnect\fP call to associate
+a socket with a specific destination address. In this case, any
+data sent on the socket will automatically be addressed
+to the connected peer, and only data received from that
+peer will be delivered to the user. Only one connected
+address is permitted for each socket at one time;
+a second connect will change the destination address,
+and a connect to a null address (family AF_UNSPEC)
+will disconnect.
+Connect requests on datagram sockets return immediately,
+as this simply results in the system recording
+the peer's address (as compared to a stream socket, where a
+connect request initiates establishment of an end to end
+connection). \fIAccept\fP and \fIlisten\fP are not
+used with datagram sockets.
+.PP
+While a datagram socket socket is connected,
+errors from recent \fIsend\fP calls may be returned
+asynchronously.
+These errors may be reported on subsequent operations
+on the socket,
+or a special socket option used with \fIgetsockopt\fP, SO_ERROR,
+may be used to interrogate the error status.
+A \fIselect\fP for reading or writing will return true
+when an error indication has been received.
+The next operation will return the error, and the error status is cleared.
+Other of the less
+important details of datagram sockets are described
+in section 5.
+.NH 2
+Input/Output multiplexing
+.PP
+One last facility often used in developing applications
+is the ability to multiplex i/o requests among multiple
+sockets and/or files. This is done using the \fIselect\fP
+call:
+.DS
+#include <sys/time.h>
+#include <sys/types.h>
+ ...
+
+fd_set readmask, writemask, exceptmask;
+struct timeval timeout;
+ ...
+select(nfds, &readmask, &writemask, &exceptmask, &timeout);
+.DE
+\fISelect\fP takes as arguments pointers to three sets, one for
+the set of file descriptors for which the caller wishes to
+be able to read data on, one for those descriptors to which
+data is to be written, and one for which exceptional conditions
+are pending; out-of-band data is the only
+exceptional condition currently implemented by the socket
+If the user is not interested
+in certain conditions (i.e., read, write, or exceptions),
+the corresponding argument to the \fIselect\fP should
+be a null pointer.
+.PP
+Each set is actually a structure containing an array of
+long integer bit masks; the size of the array is set
+by the definition FD_SETSIZE.
+The array is be
+long enough to hold one bit for each of FD_SETSIZE file descriptors.
+.PP
+The macros FD_SET(\fIfd, &mask\fP) and
+FD_CLR(\fIfd, &mask\fP)
+have been provided for adding and removing file descriptor
+\fIfd\fP in the set \fImask\fP. The
+set should be zeroed before use, and
+the macro FD_ZERO(\fI&mask\fP) has been provided
+to clear the set \fImask\fP.
+The parameter \fInfds\fP in the \fIselect\fP call specifies the range
+of file descriptors (i.e. one plus the value of the largest
+descriptor) to be examined in a set.
+.PP
+A timeout value may be specified if the selection
+is not to last more than a predetermined period of time. If
+the fields in \fItimeout\fP are set to 0, the selection takes
+the form of a
+\fIpoll\fP, returning immediately. If the last parameter is
+a null pointer, the selection will block indefinitely*.
+.FS
+* To be more specific, a return takes place only when a
+descriptor is selectable, or when a signal is received by
+the caller, interrupting the system call.
+.FE
+\fISelect\fP normally returns the number of file descriptors selected;
+if the \fIselect\fP call returns due to the timeout expiring, then
+the value 0 is returned.
+If the \fIselect\fP terminates because of an error or interruption,
+a \-1 is returned with the error number in \fIerrno\fP,
+and with the file descriptor masks unchanged.
+.PP
+Assuming a successful return, the three sets will
+indicate which
+file descriptors are ready to be read from, written to, or
+have exceptional conditions pending.
+The status of a file descriptor in a select mask may be
+tested with the \fIFD_ISSET(fd, &mask)\fP macro, which
+returns a non-zero value if \fIfd\fP is a member of the set
+\fImask\fP, and 0 if it is not.
+.PP
+To determine if there are connections waiting
+on a socket to be used with an \fIaccept\fP call,
+\fIselect\fP can be used, followed by
+a \fIFD_ISSET(fd, &mask)\fP macro to check for read
+readiness on the appropriate socket. If \fIFD_ISSET\fP
+returns a non-zero value, indicating permission to read, then a
+connection is pending on the socket.
+.PP
+As an example, to read data from two sockets, \fIs1\fP and
+\fIs2\fP as it is available from each and with a one-second
+timeout, the following code
+might be used:
+.DS
+#include <sys/time.h>
+#include <sys/types.h>
+ ...
+fd_set read_template;
+struct timeval wait;
+ ...
+for (;;) {
+ wait.tv_sec = 1; /* one second */
+ wait.tv_usec = 0;
+
+ FD_ZERO(&read_template);
+
+ FD_SET(s1, &read_template);
+ FD_SET(s2, &read_template);
+
+ nb = select(FD_SETSIZE, &read_template, (fd_set *) 0, (fd_set *) 0, &wait);
+ if (nb <= 0) {
+ \fIAn error occurred during the \fPselect\fI, or
+ the \fPselect\fI timed out.\fP
+ }
+
+ if (FD_ISSET(s1, &read_template)) {
+ \fISocket #1 is ready to be read from.\fP
+ }
+
+ if (FD_ISSET(s2, &read_template)) {
+ \fISocket #2 is ready to be read from.\fP
+ }
+}
+.DE
+.PP
+In 4.2, the arguments to \fIselect\fP were pointers to integers
+instead of pointers to \fIfd_set\fPs. This type of call
+will still work as long as the number of file descriptors
+being examined is less than the number of bits in an
+integer; however, the methods illustrated above should
+be used in all current programs.
+.PP
+\fISelect\fP provides a synchronous multiplexing scheme.
+Asynchronous notification of output completion, input availability,
+and exceptional conditions is possible through use of the
+SIGIO and SIGURG signals described in section 5.
diff --git a/share/doc/psd/21.ipc/3.t b/share/doc/psd/21.ipc/3.t
new file mode 100644
index 0000000..0d429cf
--- /dev/null
+++ b/share/doc/psd/21.ipc/3.t
@@ -0,0 +1,409 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)3.t 8.1 (Berkeley) 6/8/93
+.\"
+.\".ds RH "Network Library Routines
+.bp
+.nr H1 3
+.nr H2 0
+.bp
+.LG
+.B
+.ce
+3. NETWORK LIBRARY ROUTINES
+.sp 2
+.R
+.NL
+.PP
+The discussion in section 2 indicated the possible need to
+locate and construct network addresses when using the
+interprocess communication facilities in a distributed
+environment. To aid in this task a number of routines
+have been added to the standard C run-time library.
+In this section we will consider the new routines provided
+to manipulate network addresses. While the 4.4BSD networking
+facilities support the Internet protocols
+and the Xerox NS protocols,
+most of the routines presented
+in this section do not apply to the NS domain. Unless otherwise
+stated, it should be assumed that the routines presented in this
+section do not apply to the NS domain.
+.PP
+Locating a service on a remote host requires many levels of
+mapping before client and server may
+communicate. A service is assigned a name which is intended
+for human consumption; e.g. \*(lqthe \fIlogin server\fP on host
+monet\*(rq.
+This name, and the name of the peer host, must then be translated
+into network \fIaddresses\fP which are not necessarily suitable
+for human consumption. Finally, the address must then used in locating
+a physical \fIlocation\fP and \fIroute\fP to the service. The
+specifics of these three mappings are likely to vary between
+network architectures. For instance, it is desirable for a network
+to not require hosts to
+be named in such a way that their physical location is known by
+the client host. Instead, underlying services in the network
+may discover the actual location of the host at the time a client
+host wishes to communicate. This ability to have hosts named in
+a location independent manner may induce overhead in connection
+establishment, as a discovery process must take place,
+but allows a host to be physically mobile without requiring it to
+notify its clientele of its current location.
+.PP
+Standard routines are provided for: mapping host names
+to network addresses, network names to network numbers,
+protocol names to protocol numbers, and service names
+to port numbers and the appropriate protocol to
+use in communicating with the server process. The
+file <\fInetdb.h\fP> must be included when using any of these
+routines.
+.NH 2
+Host names
+.PP
+An Internet host name to address mapping is represented by
+the \fIhostent\fP structure:
+.DS
+.if t .ta 0.6i 1.1i 2.6i
+struct hostent {
+ char *h_name; /* official name of host */
+ char **h_aliases; /* alias list */
+ int h_addrtype; /* host address type (e.g., AF_INET) */
+ int h_length; /* length of address */
+ char **h_addr_list; /* list of addresses, null terminated */
+};
+
+#define h_addr h_addr_list[0] /* first address, network byte order */
+.DE
+The routine \fIgethostbyname\fP(3N) takes an Internet host name
+and returns a \fIhostent\fP structure,
+while the routine \fIgethostbyaddr\fP(3N)
+maps Internet host addresses into a \fIhostent\fP structure.
+.PP
+The official name of the host and its public aliases are
+returned by these routines,
+along with the address type (family) and a null terminated list of
+variable length address. This list of addresses is
+required because it is possible
+for a host to have many addresses, all having the same name.
+The \fIh_addr\fP definition is provided for backward compatibility,
+and is defined to be the first address in the list of addresses
+in the \fIhostent\fP structure.
+.PP
+The database for these calls is provided either by the
+file \fI/etc/hosts\fP (\fIhosts\fP\|(5)),
+or by use of a nameserver, \fInamed\fP\|(8).
+Because of the differences in these databases and their access protocols,
+the information returned may differ.
+When using the host table version of \fIgethostbyname\fP,
+only one address will be returned, but all listed aliases will be included.
+The nameserver version may return alternate addresses,
+but will not provide any aliases other than one given as argument.
+.PP
+Unlike Internet names, NS names are always mapped into host
+addresses by the use of a standard NS \fIClearinghouse service\fP,
+a distributed name and authentication server. The algorithms
+for mapping NS names to addresses via a Clearinghouse are
+rather complicated, and the routines are not part of the
+standard libraries. The user-contributed Courier (Xerox
+remote procedure call protocol) compiler contains routines
+to accomplish this mapping; see the documentation and
+examples provided therein for more information. It is
+expected that almost all software that has to communicate
+using NS will need to use the facilities of
+the Courier compiler.
+.PP
+An NS host address is represented by the following:
+.DS
+union ns_host {
+ u_char c_host[6];
+ u_short s_host[3];
+};
+
+union ns_net {
+ u_char c_net[4];
+ u_short s_net[2];
+};
+
+struct ns_addr {
+ union ns_net x_net;
+ union ns_host x_host;
+ u_short x_port;
+};
+.DE
+The following code fragment inserts a known NS address into
+a \fIns_addr\fP:
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netns/ns.h>
+ ...
+u_long netnum;
+struct sockaddr_ns dst;
+ ...
+bzero((char *)&dst, sizeof(dst));
+
+/*
+ * There is no convenient way to assign a long
+ * integer to a ``union ns_net'' at present; in
+ * the future, something will hopefully be provided,
+ * but this is the portable way to go for now.
+ * The network number below is the one for the NS net
+ * that the desired host (gyre) is on.
+ */
+netnum = htonl(2266);
+dst.sns_addr.x_net = *(union ns_net *) &netnum;
+dst.sns_family = AF_NS;
+
+/*
+ * host 2.7.1.0.2a.18 == "gyre:Computer Science:UofMaryland"
+ */
+dst.sns_addr.x_host.c_host[0] = 0x02;
+dst.sns_addr.x_host.c_host[1] = 0x07;
+dst.sns_addr.x_host.c_host[2] = 0x01;
+dst.sns_addr.x_host.c_host[3] = 0x00;
+dst.sns_addr.x_host.c_host[4] = 0x2a;
+dst.sns_addr.x_host.c_host[5] = 0x18;
+dst.sns_addr.x_port = htons(75);
+.DE
+.NH 2
+Network names
+.PP
+As for host names, routines for mapping network names to numbers,
+and back, are provided. These routines return a \fInetent\fP
+structure:
+.DS
+.DT
+/*
+ * Assumption here is that a network number
+ * fits in 32 bits -- probably a poor one.
+ */
+struct netent {
+ char *n_name; /* official name of net */
+ char **n_aliases; /* alias list */
+ int n_addrtype; /* net address type */
+ int n_net; /* network number, host byte order */
+};
+.DE
+The routines \fIgetnetbyname\fP(3N), \fIgetnetbynumber\fP(3N),
+and \fIgetnetent\fP(3N) are the network counterparts to the
+host routines described above. The routines extract their
+information from \fI/etc/networks\fP.
+.PP
+NS network numbers are determined either by asking your local
+Xerox Network Administrator (and hardcoding the information
+into your code), or by querying the Clearinghouse for addresses.
+The internetwork router is the only process
+that needs to manipulate network numbers on a regular basis; if
+a process wishes to communicate with a machine, it should ask the
+Clearinghouse for that machine's address (which will include
+the net number).
+.NH 2
+Protocol names
+.PP
+For protocols, which are defined in \fI/etc/protocols\fP,
+the \fIprotoent\fP structure defines the
+protocol-name mapping
+used with the routines \fIgetprotobyname\fP(3N),
+\fIgetprotobynumber\fP(3N),
+and \fIgetprotoent\fP(3N):
+.DS
+.DT
+struct protoent {
+ char *p_name; /* official protocol name */
+ char **p_aliases; /* alias list */
+ int p_proto; /* protocol number */
+};
+.DE
+.PP
+In the NS domain, protocols are indicated by the "client type"
+field of a IDP header. No protocol database exists; see section
+5 for more information.
+.NH 2
+Service names
+.PP
+Information regarding services is a bit more complicated. A service
+is expected to reside at a specific \*(lqport\*(rq and employ
+a particular communication protocol. This view is consistent with
+the Internet domain, but inconsistent with other network architectures.
+Further, a service may reside on multiple ports.
+If this occurs, the higher level library routines
+will have to be bypassed or extended.
+Services available are contained in the file \fI/etc/services\fP.
+A service mapping is described by the \fIservent\fP structure,
+.DS
+.DT
+struct servent {
+ char *s_name; /* official service name */
+ char **s_aliases; /* alias list */
+ int s_port; /* port number, network byte order */
+ char *s_proto; /* protocol to use */
+};
+.DE
+The routine \fIgetservbyname\fP(3N) maps service
+names to a servent structure by specifying a service name and,
+optionally, a qualifying protocol. Thus the call
+.DS
+sp = getservbyname("telnet", (char *) 0);
+.DE
+returns the service specification for a telnet server using
+any protocol, while the call
+.DS
+sp = getservbyname("telnet", "tcp");
+.DE
+returns only that telnet server which uses the TCP protocol.
+The routines \fIgetservbyport\fP(3N) and \fIgetservent\fP(3N) are
+also provided. The \fIgetservbyport\fP routine has an interface similar
+to that provided by \fIgetservbyname\fP; an optional protocol name may
+be specified to qualify lookups.
+.PP
+In the NS domain, services are handled by a central dispatcher
+provided as part of the Courier remote procedure call facilities.
+Again, the reader is referred to the Courier compiler documentation
+and to the Xerox standard*
+.FS
+* \fICourier: The Remote Procedure Call Protocol\fP, XSIS 038112.
+.FE
+for further details.
+.NH 2
+Miscellaneous
+.PP
+With the support routines described above, an Internet application program
+should rarely have to deal directly
+with addresses. This allows
+services to be developed as much as possible in a network independent
+fashion. It is clear, however, that purging all network dependencies
+is very difficult. So long as the user is required to supply network
+addresses when naming services and sockets there will always some
+network dependency in a program. For example, the normal
+code included in client programs, such as the remote login program,
+is of the form shown in Figure 1.
+(This example will be considered in more detail in section 4.)
+.PP
+If we wanted to make the remote login program independent of the
+Internet protocols and addressing scheme we would be forced to add
+a layer of routines which masked the network dependent aspects from
+the mainstream login code. For the current facilities available in
+the system this does not appear to be worthwhile.
+.PP
+Aside from the address-related data base routines, there are several
+other routines available in the run-time library which are of interest
+to users. These are intended mostly to simplify manipulation of
+names and addresses. Table 1 summarizes the routines
+for manipulating variable length byte strings and handling byte
+swapping of network addresses and values.
+.KF
+.DS B
+.TS
+box;
+l | l
+l | l.
+Call Synopsis
+_
+bcmp(s1, s2, n) compare byte-strings; 0 if same, not 0 otherwise
+bcopy(s1, s2, n) copy n bytes from s1 to s2
+bzero(base, n) zero-fill n bytes starting at base
+htonl(val) convert 32-bit quantity from host to network byte order
+htons(val) convert 16-bit quantity from host to network byte order
+ntohl(val) convert 32-bit quantity from network to host byte order
+ntohs(val) convert 16-bit quantity from network to host byte order
+.TE
+.DE
+.ce
+Table 1. C run-time routines.
+.KE
+.PP
+The byte swapping routines are provided because the operating
+system expects addresses to be supplied in network order (aka ``big-endian'' order). On
+``little-endian'' architectures, such as Intel x86 and VAX,
+host byte ordering is different than
+network byte ordering. Consequently,
+programs are sometimes required to byte swap quantities. The
+library routines which return network addresses provide them
+in network order so that they may simply be copied into the structures
+provided to the system. This implies users should encounter the
+byte swapping problem only when \fIinterpreting\fP network addresses.
+For example, if an Internet port is to be printed out the following
+code would be required:
+.DS
+printf("port number %d\en", ntohs(sp->s_port));
+.DE
+On machines where unneeded these routines are defined as null
+macros.
+.DS
+.if t .ta .5i 1.0i 1.5i 2.0i
+.if n .ta .7i 1.4i 2.1i 2.8i
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <netdb.h>
+ ...
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+ struct sockaddr_in server;
+ struct servent *sp;
+ struct hostent *hp;
+ int s;
+ ...
+ sp = getservbyname("login", "tcp");
+ if (sp == NULL) {
+ fprintf(stderr, "rlogin: tcp/login: unknown service\en");
+ exit(1);
+ }
+ hp = gethostbyname(argv[1]);
+ if (hp == NULL) {
+ fprintf(stderr, "rlogin: %s: unknown host\en", argv[1]);
+ exit(2);
+ }
+ bzero((char *)&server, sizeof (server));
+ bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length);
+ server.sin_family = hp->h_addrtype;
+ server.sin_port = sp->s_port;
+ s = socket(AF_INET, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("rlogin: socket");
+ exit(3);
+ }
+ ...
+ /* Connect does the bind() for us */
+
+ if (connect(s, (char *)&server, sizeof (server)) < 0) {
+ perror("rlogin: connect");
+ exit(5);
+ }
+ ...
+}
+.DE
+.ce
+Figure 1. Remote login client code.
diff --git a/share/doc/psd/21.ipc/4.t b/share/doc/psd/21.ipc/4.t
new file mode 100644
index 0000000..e75af14
--- /dev/null
+++ b/share/doc/psd/21.ipc/4.t
@@ -0,0 +1,514 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)4.t 8.1 (Berkeley) 6/8/93
+.\"
+.\".ds RH "Client/Server Model
+.bp
+.nr H1 4
+.nr H2 0
+.sp 8i
+.bp
+.LG
+.B
+.ce
+4. CLIENT/SERVER MODEL
+.sp 2
+.R
+.NL
+.PP
+The most commonly used paradigm in constructing distributed applications
+is the client/server model. In this scheme client applications request
+services from a server process. This implies an asymmetry in establishing
+communication between the client and server which has been examined
+in section 2. In this section we will look more closely at the interactions
+between client and server, and consider some of the problems in developing
+client and server applications.
+.PP
+The client and server require a well known set of conventions before
+service may be rendered (and accepted). This set of conventions
+comprises a protocol which must be implemented at both ends of a
+connection. Depending on the situation, the protocol may be symmetric
+or asymmetric. In a symmetric protocol, either side may play the
+master or slave roles. In an asymmetric protocol, one side is
+immutably recognized as the master, with the other as the slave.
+An example of a symmetric protocol is the TELNET protocol used in
+the Internet for remote terminal emulation. An example
+of an asymmetric protocol is the Internet file transfer protocol,
+FTP. No matter whether the specific protocol used in obtaining
+a service is symmetric or asymmetric, when accessing a service there
+is a \*(lqclient process\*(rq and a \*(lqserver process\*(rq. We
+will first consider the properties of server processes, then
+client processes.
+.PP
+A server process normally listens at a well known address for
+service requests. That is, the server process remains dormant
+until a connection is requested by a client's connection
+to the server's address. At such a time
+the server process ``wakes up'' and services the client,
+performing whatever appropriate actions the client requests of it.
+.PP
+Alternative schemes which use a service server
+may be used to eliminate a flock of server processes clogging the
+system while remaining dormant most of the time. For Internet
+servers in 4.4BSD,
+this scheme has been implemented via \fIinetd\fP, the so called
+``internet super-server.'' \fIInetd\fP listens at a variety
+of ports, determined at start-up by reading a configuration file.
+When a connection is requested to a port on which \fIinetd\fP is
+listening, \fIinetd\fP executes the appropriate server program to handle the
+client. With this method, clients are unaware that an
+intermediary such as \fIinetd\fP has played any part in the
+connection. \fIInetd\fP will be described in more detail in
+section 5.
+.PP
+A similar alternative scheme is used by most Xerox services. In general,
+the Courier dispatch process (if used) accepts connections from
+processes requesting services of some sort or another. The client
+processes request a particular <program number, version number, procedure
+number> triple. If the dispatcher knows of such a program, it is
+started to handle the request; if not, an error is reported to the
+client. In this way, only one port is required to service a large
+variety of different requests. Again, the Courier facilities are
+not available without the use and installation of the Courier
+compiler. The information presented in this section applies only
+to NS clients and services that do not use Courier.
+.NH 2
+Servers
+.PP
+In 4.4BSD most servers are accessed at well known Internet addresses
+or UNIX domain names. For
+example, the remote login server's main loop is of the form shown
+in Figure 2.
+.KF
+.if t .ta .5i 1.0i 1.5i 2.0i 2.5i 3.0i 3.5i
+.if n .ta .7i 1.4i 2.1i 2.8i 3.5i 4.2i 4.9i
+.sp 0.5i
+.DS
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+ int f;
+ struct sockaddr_in from;
+ struct servent *sp;
+
+ sp = getservbyname("login", "tcp");
+ if (sp == NULL) {
+ fprintf(stderr, "rlogind: tcp/login: unknown service\en");
+ exit(1);
+ }
+ ...
+#ifndef DEBUG
+ /* Disassociate server from controlling terminal */
+ ...
+#endif
+
+ sin.sin_port = sp->s_port; /* Restricted port -- see section 5 */
+ ...
+ f = socket(AF_INET, SOCK_STREAM, 0);
+ ...
+ if (bind(f, (struct sockaddr *) &sin, sizeof (sin)) < 0) {
+ ...
+ }
+ ...
+ listen(f, 5);
+ for (;;) {
+ int g, len = sizeof (from);
+
+ g = accept(f, (struct sockaddr *) &from, &len);
+ if (g < 0) {
+ if (errno != EINTR)
+ syslog(LOG_ERR, "rlogind: accept: %m");
+ continue;
+ }
+ if (fork() == 0) {
+ close(f);
+ doit(g, &from);
+ }
+ close(g);
+ }
+}
+.DE
+.ce
+Figure 2. Remote login server.
+.sp 0.5i
+.KE
+.PP
+The first step taken by the server is look up its service
+definition:
+.sp 1
+.nf
+.in +5
+.if t .ta .5i 1.0i 1.5i 2.0i
+.if n .ta .7i 1.4i 2.1i 2.8i
+sp = getservbyname("login", "tcp");
+if (sp == NULL) {
+ fprintf(stderr, "rlogind: tcp/login: unknown service\en");
+ exit(1);
+}
+.sp 1
+.in -5
+.fi
+The result of the \fIgetservbyname\fP call
+is used in later portions of the code to
+define the Internet port at which it listens for service
+requests (indicated by a connection).
+.KS
+.PP
+Step two is to disassociate the server from the controlling
+terminal of its invoker:
+.DS
+ for (i = 0; i < 3; ++i)
+ close(i);
+
+ open("/", O_RDONLY);
+ dup2(0, 1);
+ dup2(0, 2);
+
+ i = open("/dev/tty", O_RDWR);
+ if (i >= 0) {
+ ioctl(i, TIOCNOTTY, 0);
+ close(i);
+ }
+.DE
+.KE
+This step is important as the server will
+likely not want to receive signals delivered to the process
+group of the controlling terminal. Note, however, that
+once a server has disassociated itself it can no longer
+send reports of errors to a terminal, and must log errors
+via \fIsyslog\fP.
+.PP
+Once a server has established a pristine environment, it
+creates a socket and begins accepting service requests.
+The \fIbind\fP call is required to insure the server listens
+at its expected location. It should be noted that the
+remote login server listens at a restricted port number, and must
+therefore be run
+with a user-id of root.
+This concept of a ``restricted port number'' is 4BSD
+specific, and is covered in section 5.
+.PP
+The main body of the loop is fairly simple:
+.DS
+.if t .ta .5i 1.0i 1.5i 2.0i
+.if n .ta .7i 1.4i 2.1i 2.8i
+for (;;) {
+ int g, len = sizeof (from);
+
+ g = accept(f, (struct sockaddr *)&from, &len);
+ if (g < 0) {
+ if (errno != EINTR)
+ syslog(LOG_ERR, "rlogind: accept: %m");
+ continue;
+ }
+ if (fork() == 0) { /* Child */
+ close(f);
+ doit(g, &from);
+ }
+ close(g); /* Parent */
+}
+.DE
+An \fIaccept\fP call blocks the server until
+a client requests service. This call could return a
+failure status if the call is interrupted by a signal
+such as SIGCHLD (to be discussed in section 5). Therefore,
+the return value from \fIaccept\fP is checked to insure
+a connection has actually been established, and
+an error report is logged via \fIsyslog\fP if an error
+has occurred.
+.PP
+With a connection
+in hand, the server then forks a child process and invokes
+the main body of the remote login protocol processing. Note
+how the socket used by the parent for queuing connection
+requests is closed in the child, while the socket created as
+a result of the \fIaccept\fP is closed in the parent. The
+address of the client is also handed the \fIdoit\fP routine
+because it requires it in authenticating clients.
+.NH 2
+Clients
+.PP
+The client side of the remote login service was shown
+earlier in Figure 1.
+One can see the separate, asymmetric roles of the client
+and server clearly in the code. The server is a passive entity,
+listening for client connections, while the client process is
+an active entity, initiating a connection when invoked.
+.PP
+Let us consider more closely the steps taken
+by the client remote login process. As in the server process,
+the first step is to locate the service definition for a remote
+login:
+.DS
+sp = getservbyname("login", "tcp");
+if (sp == NULL) {
+ fprintf(stderr, "rlogin: tcp/login: unknown service\en");
+ exit(1);
+}
+.DE
+Next the destination host is looked up with a
+\fIgethostbyname\fP call:
+.DS
+hp = gethostbyname(argv[1]);
+if (hp == NULL) {
+ fprintf(stderr, "rlogin: %s: unknown host\en", argv[1]);
+ exit(2);
+}
+.DE
+With this accomplished, all that is required is to establish a
+connection to the server at the requested host and start up the
+remote login protocol. The address buffer is cleared, then filled
+in with the Internet address of the foreign host and the port
+number at which the login process resides on the foreign host:
+.DS
+bzero((char *)&server, sizeof (server));
+bcopy(hp->h_addr, (char *) &server.sin_addr, hp->h_length);
+server.sin_family = hp->h_addrtype;
+server.sin_port = sp->s_port;
+.DE
+A socket is created, and a connection initiated. Note
+that \fIconnect\fP implicitly performs a \fIbind\fP
+call, since \fIs\fP is unbound.
+.DS
+s = socket(hp->h_addrtype, SOCK_STREAM, 0);
+if (s < 0) {
+ perror("rlogin: socket");
+ exit(3);
+}
+ ...
+if (connect(s, (struct sockaddr *) &server, sizeof (server)) < 0) {
+ perror("rlogin: connect");
+ exit(4);
+}
+.DE
+The details of the remote login protocol will not be considered here.
+.NH 2
+Connectionless servers
+.PP
+While connection-based services are the norm, some services
+are based on the use of datagram sockets. One, in particular,
+is the \*(lqrwho\*(rq service which provides users with status
+information for hosts connected to a local area
+network. This service, while predicated on the ability to
+\fIbroadcast\fP information to all hosts connected to a particular
+network, is of interest as an example usage of datagram sockets.
+.PP
+A user on any machine running the rwho server may find out
+the current status of a machine with the \fIruptime\fP(1) program.
+The output generated is illustrated in Figure 3.
+.KF
+.DS B
+.TS
+l r l l l l l.
+arpa up 9:45, 5 users, load 1.15, 1.39, 1.31
+cad up 2+12:04, 8 users, load 4.67, 5.13, 4.59
+calder up 10:10, 0 users, load 0.27, 0.15, 0.14
+dali up 2+06:28, 9 users, load 1.04, 1.20, 1.65
+degas up 25+09:48, 0 users, load 1.49, 1.43, 1.41
+ear up 5+00:05, 0 users, load 1.51, 1.54, 1.56
+ernie down 0:24
+esvax down 17:04
+ingres down 0:26
+kim up 3+09:16, 8 users, load 2.03, 2.46, 3.11
+matisse up 3+06:18, 0 users, load 0.03, 0.03, 0.05
+medea up 3+09:39, 2 users, load 0.35, 0.37, 0.50
+merlin down 19+15:37
+miro up 1+07:20, 7 users, load 4.59, 3.28, 2.12
+monet up 1+00:43, 2 users, load 0.22, 0.09, 0.07
+oz down 16:09
+statvax up 2+15:57, 3 users, load 1.52, 1.81, 1.86
+ucbvax up 9:34, 2 users, load 6.08, 5.16, 3.28
+.TE
+.DE
+.ce
+Figure 3. ruptime output.
+.sp
+.KE
+.PP
+Status information for each host is periodically broadcast
+by rwho server processes on each machine. The same server
+process also receives the status information and uses it
+to update a database. This database is then interpreted
+to generate the status information for each host. Servers
+operate autonomously, coupled only by the local network and
+its broadcast capabilities.
+.PP
+Note that the use of broadcast for such a task is fairly inefficient,
+as all hosts must process each message, whether or not using an rwho server.
+Unless such a service is sufficiently universal and is frequently used,
+the expense of periodic broadcasts outweighs the simplicity.
+.PP
+Multicasting is an alternative to broadcasting.
+Setting up multicast sockets is described in Section 5.10.
+.PP
+The rwho server, in a simplified form, is pictured in Figure
+4. There are two separate tasks performed by the server. The
+first task is to act as a receiver of status information broadcast
+by other hosts on the network. This job is carried out in the
+main loop of the program. Packets received at the rwho port
+are interrogated to insure they've been sent by another rwho
+server process, then are time stamped with their arrival time
+and used to update a file indicating the status of the host.
+When a host has not been heard from for an extended period of
+time, the database interpretation routines assume the host is
+down and indicate such on the status reports. This algorithm
+is prone to error as a server may be down while a host is actually
+up, but serves our current needs.
+.KF
+.DS
+.if t .ta .5i 1.0i 1.5i 2.0i
+.if n .ta .7i 1.4i 2.1i 2.8i
+main()
+{
+ ...
+ sp = getservbyname("who", "udp");
+ net = getnetbyname("localnet");
+ sin.sin_addr = inet_makeaddr(INADDR_ANY, net);
+ sin.sin_port = sp->s_port;
+ ...
+ s = socket(AF_INET, SOCK_DGRAM, 0);
+ ...
+ on = 1;
+ if (setsockopt(s, SOL_SOCKET, SO_BROADCAST, &on, sizeof(on)) < 0) {
+ syslog(LOG_ERR, "setsockopt SO_BROADCAST: %m");
+ exit(1);
+ }
+ bind(s, (struct sockaddr *) &sin, sizeof (sin));
+ ...
+ signal(SIGALRM, onalrm);
+ onalrm();
+ for (;;) {
+ struct whod wd;
+ int cc, whod, len = sizeof (from);
+
+ cc = recvfrom(s, (char *)&wd, sizeof (struct whod), 0,
+ (struct sockaddr *)&from, &len);
+ if (cc <= 0) {
+ if (cc < 0 && errno != EINTR)
+ syslog(LOG_ERR, "rwhod: recv: %m");
+ continue;
+ }
+ if (from.sin_port != sp->s_port) {
+ syslog(LOG_ERR, "rwhod: %d: bad from port",
+ ntohs(from.sin_port));
+ continue;
+ }
+ ...
+ if (!verify(wd.wd_hostname)) {
+ syslog(LOG_ERR, "rwhod: malformed host name from %x",
+ ntohl(from.sin_addr.s_addr));
+ continue;
+ }
+ (void) sprintf(path, "%s/whod.%s", RWHODIR, wd.wd_hostname);
+ whod = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666);
+ ...
+ (void) time(&wd.wd_recvtime);
+ (void) write(whod, (char *)&wd, cc);
+ (void) close(whod);
+ }
+}
+.DE
+.ce
+Figure 4. rwho server.
+.sp
+.KE
+.PP
+The second task performed by the server is to supply information
+regarding the status of its host. This involves periodically
+acquiring system status information, packaging it up in a message
+and broadcasting it on the local network for other rwho servers
+to hear. The supply function is triggered by a timer and
+runs off a signal. Locating the system status
+information is somewhat involved, but uninteresting. Deciding
+where to transmit the resultant packet
+is somewhat problematical, however.
+.PP
+Status information must be broadcast on the local network.
+For networks which do not support the notion of broadcast another
+scheme must be used to simulate or
+replace broadcasting. One possibility is to enumerate the
+known neighbors (based on the status messages received
+from other rwho servers). This, unfortunately,
+requires some bootstrapping information,
+for a server will have no idea what machines are its
+neighbors until it receives status messages from them.
+Therefore, if all machines on a net are freshly booted,
+no machine will have any
+known neighbors and thus never receive, or send, any status information.
+This is the identical problem faced by the routing table management
+process in propagating routing status information. The standard
+solution, unsatisfactory as it may be, is to inform one or more servers
+of known neighbors and request that they always communicate with
+these neighbors. If each server has at least one neighbor supplied
+to it, status information may then propagate through
+a neighbor to hosts which
+are not (possibly) directly neighbors. If the server is able to
+support networks which provide a broadcast capability, as well as
+those which do not, then networks with an
+arbitrary topology may share status information*.
+.FS
+* One must, however, be concerned about \*(lqloops\*(rq.
+That is, if a host is connected to multiple networks, it
+will receive status information from itself. This can lead
+to an endless, wasteful, exchange of information.
+.FE
+.PP
+It is important that software operating in a distributed
+environment not have any site-dependent information compiled into it.
+This would require a separate copy of the server at each host and
+make maintenance a severe headache. 4.4BSD attempts to isolate
+host-specific information from applications by providing system
+calls which return the necessary information*.
+.FS
+* An example of such a system call is the \fIgethostname\fP(2)
+call which returns the host's \*(lqofficial\*(rq name.
+.FE
+A mechanism exists, in the form of an \fIioctl\fP call,
+for finding the collection
+of networks to which a host is directly connected.
+Further, a local network broadcasting mechanism
+has been implemented at the socket level.
+Combining these two features allows a process
+to broadcast on any directly connected local
+network which supports the notion of broadcasting
+in a site independent manner. This allows 4.4BSD
+to solve the problem of deciding how to propagate
+status information in the case of \fIrwho\fP, or
+more generally in broadcasting:
+Such status information is broadcast to connected
+networks at the socket level, where the connected networks
+have been obtained via the appropriate \fIioctl\fP
+calls.
+The specifics of
+such broadcastings are complex, however, and will
+be covered in section 5.
diff --git a/share/doc/psd/21.ipc/5.t b/share/doc/psd/21.ipc/5.t
new file mode 100644
index 0000000..fe6977e
--- /dev/null
+++ b/share/doc/psd/21.ipc/5.t
@@ -0,0 +1,1667 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)5.t 8.1 (Berkeley) 8/14/93
+.\"
+.\".ds RH "Advanced Topics
+.bp
+.nr H1 5
+.nr H2 0
+.LG
+.B
+.ce
+5. ADVANCED TOPICS
+.sp 2
+.R
+.NL
+.PP
+A number of facilities have yet to be discussed. For most users
+of the IPC the mechanisms already
+described will suffice in constructing distributed
+applications. However, others will find the need to utilize some
+of the features which we consider in this section.
+.NH 2
+Out of band data
+.PP
+The stream socket abstraction includes the notion of \*(lqout
+of band\*(rq data. Out of band data is a logically independent
+transmission channel associated with each pair of connected
+stream sockets. Out of band data is delivered to the user
+independently of normal data.
+The abstraction defines that the out of band data facilities
+must support the reliable delivery of at least one
+out of band message at a time. This message may contain at least one
+byte of data, and at least one message may be pending delivery
+to the user at any one time. For communications protocols which
+support only in-band signaling (i.e. the urgent data is
+delivered in sequence with the normal data), the system normally extracts
+the data from the normal data stream and stores it separately.
+This allows users to choose between receiving the urgent data
+in order and receiving it out of sequence without having to
+buffer all the intervening data. It is possible
+to ``peek'' (via MSG_PEEK) at out of band data.
+If the socket has a process group, a SIGURG signal is generated
+when the protocol is notified of its existence.
+A process can set the process group
+or process id to be informed by the SIGURG signal via the
+appropriate \fIfcntl\fP call, as described below for
+SIGIO.
+If multiple sockets may have out of band data awaiting
+delivery, a \fIselect\fP call for exceptional conditions
+may be used to determine those sockets with such data pending.
+Neither the signal nor the select indicate the actual arrival
+of the out-of-band data, but only notification that it is pending.
+.PP
+In addition to the information passed, a logical mark is placed in
+the data stream to indicate the point at which the out
+of band data was sent. The remote login and remote shell
+applications use this facility to propagate signals between
+client and server processes. When a signal
+flushs any pending output from the remote process(es), all
+data up to the mark in the data stream is discarded.
+.PP
+To send an out of band message the MSG_OOB flag is supplied to
+a \fIsend\fP or \fIsendto\fP calls,
+while to receive out of band data MSG_OOB should be indicated
+when performing a \fIrecvfrom\fP or \fIrecv\fP call.
+To find out if the read pointer is currently pointing at
+the mark in the data stream, the SIOCATMARK ioctl is provided:
+.DS
+ioctl(s, SIOCATMARK, &yes);
+.DE
+If \fIyes\fP is a 1 on return, the next read will return data
+after the mark. Otherwise (assuming out of band data has arrived),
+the next read will provide data sent by the client prior
+to transmission of the out of band signal. The routine used
+in the remote login process to flush output on receipt of an
+interrupt or quit signal is shown in Figure 5.
+It reads the normal data up to the mark (to discard it),
+then reads the out-of-band byte.
+.KF
+.DS
+#include <sys/ioctl.h>
+#include <sys/file.h>
+ ...
+oob()
+{
+ int out = FWRITE, mark;
+ char waste[BUFSIZ];
+
+ /* flush local terminal output */
+ ioctl(1, TIOCFLUSH, (char *)&out);
+ for (;;) {
+ if (ioctl(rem, SIOCATMARK, &mark) < 0) {
+ perror("ioctl");
+ break;
+ }
+ if (mark)
+ break;
+ (void) read(rem, waste, sizeof (waste));
+ }
+ if (recv(rem, &mark, 1, MSG_OOB) < 0) {
+ perror("recv");
+ ...
+ }
+ ...
+}
+.DE
+.ce
+Figure 5. Flushing terminal I/O on receipt of out of band data.
+.sp
+.KE
+.PP
+A process may also read or peek at the out-of-band data
+without first reading up to the mark.
+This is more difficult when the underlying protocol delivers
+the urgent data in-band with the normal data, and only sends
+notification of its presence ahead of time (e.g., the TCP protocol
+used to implement streams in the Internet domain).
+With such protocols, the out-of-band byte may not yet have arrived
+when a \fIrecv\fP is done with the MSG_OOB flag.
+In that case, the call will return an error of EWOULDBLOCK.
+Worse, there may be enough in-band data in the input buffer
+that normal flow control prevents the peer from sending the urgent data
+until the buffer is cleared.
+The process must then read enough of the queued data
+that the urgent data may be delivered.
+.PP
+Certain programs that use multiple bytes of urgent data and must
+handle multiple urgent signals (e.g., \fItelnet\fP\|(1C))
+need to retain the position of urgent data within the stream.
+This treatment is available as a socket-level option, SO_OOBINLINE;
+see \fIsetsockopt\fP\|(2) for usage.
+With this option, the position of urgent data (the \*(lqmark\*(rq)
+is retained, but the urgent data immediately follows the mark
+within the normal data stream returned without the MSG_OOB flag.
+Reception of multiple urgent indications causes the mark to move,
+but no out-of-band data are lost.
+.NH 2
+Non-Blocking Sockets
+.PP
+It is occasionally convenient to make use of sockets
+which do not block; that is, I/O requests which
+cannot complete immediately and
+would therefore cause the process to be suspended awaiting completion are
+not executed, and an error code is returned.
+Once a socket has been created via
+the \fIsocket\fP call, it may be marked as non-blocking
+by \fIfcntl\fP as follows:
+.DS
+#include <fcntl.h>
+ ...
+int s;
+ ...
+s = socket(AF_INET, SOCK_STREAM, 0);
+ ...
+if (fcntl(s, F_SETFL, FNDELAY) < 0)
+ perror("fcntl F_SETFL, FNDELAY");
+ exit(1);
+}
+ ...
+.DE
+.PP
+When performing non-blocking I/O on sockets, one must be
+careful to check for the error EWOULDBLOCK (stored in the
+global variable \fIerrno\fP), which occurs when
+an operation would normally block, but the socket it
+was performed on is marked as non-blocking.
+In particular, \fIaccept\fP, \fIconnect\fP, \fIsend\fP, \fIrecv\fP,
+\fIread\fP, and \fIwrite\fP can
+all return EWOULDBLOCK, and processes should be prepared
+to deal with such return codes.
+If an operation such as a \fIsend\fP cannot be done in its entirety,
+but partial writes are sensible (for example, when using a stream socket),
+the data that can be sent immediately will be processed,
+and the return value will indicate the amount actually sent.
+.NH 2
+Interrupt driven socket I/O
+.PP
+The SIGIO signal allows a process to be notified
+via a signal when a socket (or more generally, a file
+descriptor) has data waiting to be read. Use of
+the SIGIO facility requires three steps: First,
+the process must set up a SIGIO signal handler
+by use of the \fIsignal\fP or \fIsigvec\fP calls. Second,
+it must set the process id or process group id which is to receive
+notification of pending input to its own process id,
+or the process group id of its process group (note that
+the default process group of a socket is group zero).
+This is accomplished by use of an \fIfcntl\fP call.
+Third, it must enable asynchronous notification of pending I/O requests
+with another \fIfcntl\fP call. Sample code to
+allow a given process to receive information on
+pending I/O requests as they occur for a socket \fIs\fP
+is given in Figure 6. With the addition of a handler for SIGURG,
+this code can also be used to prepare for receipt of SIGURG signals.
+.KF
+.DS
+#include <fcntl.h>
+ ...
+int io_handler();
+ ...
+signal(SIGIO, io_handler);
+
+/* Set the process receiving SIGIO/SIGURG signals to us */
+
+if (fcntl(s, F_SETOWN, getpid()) < 0) {
+ perror("fcntl F_SETOWN");
+ exit(1);
+}
+
+/* Allow receipt of asynchronous I/O signals */
+
+if (fcntl(s, F_SETFL, FASYNC) < 0) {
+ perror("fcntl F_SETFL, FASYNC");
+ exit(1);
+}
+.DE
+.ce
+Figure 6. Use of asynchronous notification of I/O requests.
+.sp
+.KE
+.NH 2
+Signals and process groups
+.PP
+Due to the existence of the SIGURG and SIGIO signals each socket has an
+associated process number, just as is done for terminals.
+This value is initialized to zero,
+but may be redefined at a later time with the F_SETOWN
+\fIfcntl\fP, such as was done in the code above for SIGIO.
+To set the socket's process id for signals, positive arguments
+should be given to the \fIfcntl\fP call. To set the socket's
+process group for signals, negative arguments should be
+passed to \fIfcntl\fP. Note that the process number indicates
+either the associated process id or the associated process
+group; it is impossible to specify both at the same time.
+A similar \fIfcntl\fP, F_GETOWN, is available for determining the
+current process number of a socket.
+.PP
+Another signal which is useful when constructing server processes
+is SIGCHLD. This signal is delivered to a process when any
+child processes have changed state. Normally servers use
+the signal to \*(lqreap\*(rq child processes that have exited
+without explicitly awaiting their termination
+or periodic polling for exit status.
+For example, the remote login server loop shown in Figure 2
+may be augmented as shown in Figure 7.
+.KF
+.DS
+int reaper();
+ ...
+signal(SIGCHLD, reaper);
+listen(f, 5);
+for (;;) {
+ int g, len = sizeof (from);
+
+ g = accept(f, (struct sockaddr *)&from, &len,);
+ if (g < 0) {
+ if (errno != EINTR)
+ syslog(LOG_ERR, "rlogind: accept: %m");
+ continue;
+ }
+ ...
+}
+ ...
+#include <wait.h>
+reaper()
+{
+ union wait status;
+
+ while (wait3(&status, WNOHANG, 0) > 0)
+ ;
+}
+.DE
+.sp
+.ce
+Figure 7. Use of the SIGCHLD signal.
+.sp
+.KE
+.PP
+If the parent server process fails to reap its children,
+a large number of \*(lqzombie\*(rq processes may be created.
+.NH 2
+Pseudo terminals
+.PP
+Many programs will not function properly without a terminal
+for standard input and output. Since sockets do not provide
+the semantics of terminals,
+it is often necessary to have a process communicating over
+the network do so through a \fIpseudo-terminal\fP. A pseudo-
+terminal is actually a pair of devices, master and slave,
+which allow a process to serve as an active agent in communication
+between processes and users. Data written on the slave side
+of a pseudo-terminal is supplied as input to a process reading
+from the master side, while data written on the master side are
+processed as terminal input for the slave.
+In this way, the process manipulating
+the master side of the pseudo-terminal has control over the
+information read and written on the slave side
+as if it were manipulating the keyboard and reading the screen
+on a real terminal.
+The purpose of this abstraction is to
+preserve terminal semantics over a network connection\(em
+that is, the slave side appears as a normal terminal to
+any process reading from or writing to it.
+.PP
+For example, the remote
+login server uses pseudo-terminals for remote login sessions.
+A user logging in to a machine across the network is provided
+a shell with a slave pseudo-terminal as standard input, output,
+and error. The server process then handles the communication
+between the programs invoked by the remote shell and the user's
+local client process.
+When a user sends a character that generates an interrupt
+on the remote machine that flushes terminal output,
+the pseudo-terminal generates a control message for the server process.
+The server then sends an out of band message
+to the client process to signal a flush of data at the real terminal
+and on the intervening data buffered in the network.
+.PP
+Under 4.4BSD, the name of the slave side of a pseudo-terminal is of the form
+\fI/dev/ttyxy\fP, where \fIx\fP is a single letter
+starting at `p' and continuing to `t'.
+\fIy\fP is a hexadecimal digit (i.e., a single
+character in the range 0 through 9 or `a' through `f').
+The master side of a pseudo-terminal is \fI/dev/ptyxy\fP,
+where \fIx\fP and \fIy\fP correspond to the
+slave side of the pseudo-terminal.
+.PP
+In general, the method of obtaining a pair of master and
+slave pseudo-terminals is to
+find a pseudo-terminal which
+is not currently in use.
+The master half of a pseudo-terminal is a single-open device;
+thus, each master may be opened in turn until an open succeeds.
+The slave side of the pseudo-terminal is then opened,
+and is set to the proper terminal modes if necessary.
+The process then \fIfork\fPs; the child closes
+the master side of the pseudo-terminal, and \fIexec\fPs the
+appropriate program. Meanwhile, the parent closes the
+slave side of the pseudo-terminal and begins reading and
+writing from the master side. Sample code making use of
+pseudo-terminals is given in Figure 8; this code assumes
+that a connection on a socket \fIs\fP exists, connected
+to a peer who wants a service of some kind, and that the
+process has disassociated itself from any previous controlling terminal.
+.KF
+.DS
+gotpty = 0;
+for (c = 'p'; !gotpty && c <= 's'; c++) {
+ line = "/dev/ptyXX";
+ line[sizeof("/dev/pty")-1] = c;
+ line[sizeof("/dev/ptyp")-1] = '0';
+ if (stat(line, &statbuf) < 0)
+ break;
+ for (i = 0; i < 16; i++) {
+ line[sizeof("/dev/ptyp")-1] = "0123456789abcdef"[i];
+ master = open(line, O_RDWR);
+ if (master > 0) {
+ gotpty = 1;
+ break;
+ }
+ }
+}
+if (!gotpty) {
+ syslog(LOG_ERR, "All network ports in use");
+ exit(1);
+}
+
+line[sizeof("/dev/")-1] = 't';
+slave = open(line, O_RDWR); /* \fIslave\fP is now slave side */
+if (slave < 0) {
+ syslog(LOG_ERR, "Cannot open slave pty %s", line);
+ exit(1);
+}
+
+ioctl(slave, TIOCGETP, &b); /* Set slave tty modes */
+b.sg_flags = CRMOD|XTABS|ANYP;
+ioctl(slave, TIOCSETP, &b);
+
+i = fork();
+if (i < 0) {
+ syslog(LOG_ERR, "fork: %m");
+ exit(1);
+} else if (i) { /* Parent */
+ close(slave);
+ ...
+} else { /* Child */
+ (void) close(s);
+ (void) close(master);
+ dup2(slave, 0);
+ dup2(slave, 1);
+ dup2(slave, 2);
+ if (slave > 2)
+ (void) close(slave);
+ ...
+}
+.DE
+.ce
+Figure 8. Creation and use of a pseudo terminal
+.sp
+.KE
+.NH 2
+Selecting specific protocols
+.PP
+If the third argument to the \fIsocket\fP call is 0,
+\fIsocket\fP will select a default protocol to use with
+the returned socket of the type requested.
+The default protocol is usually correct, and alternate choices are not
+usually available.
+However, when using ``raw'' sockets to communicate directly with
+lower-level protocols or hardware interfaces,
+the protocol argument may be important for setting up demultiplexing.
+For example, raw sockets in the Internet family may be used to implement
+a new protocol above IP, and the socket will receive packets
+only for the protocol specified.
+To obtain a particular protocol one determines the protocol number
+as defined within the communication domain. For the Internet
+domain one may use one of the library routines
+discussed in section 3, such as \fIgetprotobyname\fP:
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+ ...
+pp = getprotobyname("newtcp");
+s = socket(AF_INET, SOCK_STREAM, pp->p_proto);
+.DE
+This would result in a socket \fIs\fP using a stream
+based connection, but with protocol type of ``newtcp''
+instead of the default ``tcp.''
+.PP
+In the NS domain, the available socket protocols are defined in
+<\fInetns/ns.h\fP>. To create a raw socket for Xerox Error Protocol
+messages, one might use:
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netns/ns.h>
+ ...
+s = socket(AF_NS, SOCK_RAW, NSPROTO_ERROR);
+.DE
+.NH 2
+Address binding
+.PP
+As was mentioned in section 2,
+binding addresses to sockets in the Internet and NS domains can be
+fairly complex. As a brief reminder, these associations
+are composed of local and foreign
+addresses, and local and foreign ports. Port numbers are
+allocated out of separate spaces, one for each system and one
+for each domain on that system.
+Through the \fIbind\fP system call, a
+process may specify half of an association, the
+<local address, local port> part, while the
+\fIconnect\fP
+and \fIaccept\fP
+primitives are used to complete a socket's association by
+specifying the <foreign address, foreign port> part.
+Since the association is created in two steps the association
+uniqueness requirement indicated previously could be violated unless
+care is taken. Further, it is unrealistic to expect user
+programs to always know proper values to use for the local address
+and local port since a host may reside on multiple networks and
+the set of allocated port numbers is not directly accessible
+to a user.
+.PP
+To simplify local address binding in the Internet domain the notion of a
+\*(lqwildcard\*(rq address has been provided. When an address
+is specified as INADDR_ANY (a manifest constant defined in
+<netinet/in.h>), the system interprets the address as
+\*(lqany valid address\*(rq. For example, to bind a specific
+port number to a socket, but leave the local address unspecified,
+the following code might be used:
+.DS
+#include <sys/types.h>
+#include <netinet/in.h>
+ ...
+struct sockaddr_in sin;
+ ...
+s = socket(AF_INET, SOCK_STREAM, 0);
+sin.sin_family = AF_INET;
+sin.sin_addr.s_addr = htonl(INADDR_ANY);
+sin.sin_port = htons(MYPORT);
+bind(s, (struct sockaddr *) &sin, sizeof (sin));
+.DE
+Sockets with wildcarded local addresses may receive messages
+directed to the specified port number, and sent to any
+of the possible addresses assigned to a host. For example,
+if a host has addresses 128.32.0.4 and 10.0.0.78, and a socket is bound as
+above, the process will be
+able to accept connection requests which are addressed to
+128.32.0.4 or 10.0.0.78.
+If a server process wished to only allow hosts on a
+given network connect to it, it would bind
+the address of the host on the appropriate network.
+.PP
+In a similar fashion, a local port may be left unspecified
+(specified as zero), in which case the system will select an
+appropriate port number for it. This shortcut will work
+both in the Internet and NS domains. For example, to
+bind a specific local address to a socket, but to leave the
+local port number unspecified:
+.DS
+hp = gethostbyname(hostname);
+if (hp == NULL) {
+ ...
+}
+bcopy(hp->h_addr, (char *) sin.sin_addr, hp->h_length);
+sin.sin_port = htons(0);
+bind(s, (struct sockaddr *) &sin, sizeof (sin));
+.DE
+The system selects the local port number based on two criteria.
+The first is that on 4BSD systems,
+Internet ports below IPPORT_RESERVED (1024) (for the Xerox domain,
+0 through 3000) are reserved
+for privileged users (i.e., the super user);
+Internet ports above IPPORT_USERRESERVED (50000) are reserved
+for non-privileged servers. The second is
+that the port number is not currently bound to some other
+socket. In order to find a free Internet port number in the privileged
+range the \fIrresvport\fP library routine may be used as follows
+to return a stream socket in with a privileged port number:
+.DS
+int lport = IPPORT_RESERVED \- 1;
+int s;
+\&...
+s = rresvport(&lport);
+if (s < 0) {
+ if (errno == EAGAIN)
+ fprintf(stderr, "socket: all ports in use\en");
+ else
+ perror("rresvport: socket");
+ ...
+}
+.DE
+The restriction on allocating ports was done to allow processes
+executing in a \*(lqsecure\*(rq environment to perform authentication
+based on the originating address and port number. For example,
+the \fIrlogin\fP(1) command allows users to log in across a network
+without being asked for a password, if two conditions hold:
+First, the name of the system the user
+is logging in from is in the file
+\fI/etc/hosts.equiv\fP on the system he is logging
+in to (or the system name and the user name are in
+the user's \fI.rhosts\fP file in the user's home
+directory), and second, that the user's rlogin
+process is coming from a privileged port on the machine from which he is
+logging. The port number and network address of the
+machine from which the user is logging in can be determined either
+by the \fIfrom\fP result of the \fIaccept\fP call, or
+from the \fIgetpeername\fP call.
+.PP
+In certain cases the algorithm used by the system in selecting
+port numbers is unsuitable for an application. This is because
+associations are created in a two step process. For example,
+the Internet file transfer protocol, FTP, specifies that data
+connections must always originate from the same local port. However,
+duplicate associations are avoided by connecting to different foreign
+ports. In this situation the system would disallow binding the
+same local address and port number to a socket if a previous data
+connection's socket still existed. To override the default port
+selection algorithm, an option call must be performed prior
+to address binding:
+.DS
+ ...
+int on = 1;
+ ...
+setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+bind(s, (struct sockaddr *) &sin, sizeof (sin));
+.DE
+With the above call, local addresses may be bound which
+are already in use. This does not violate the uniqueness
+requirement as the system still checks at connect time to
+be sure any other sockets with the same local address and
+port do not have the same foreign address and port.
+If the association already exists, the error EADDRINUSE is returned.
+A related socket option, SO_REUSEPORT, which allows completely
+duplicate bindings, is described in the IP multicasting section.
+.NH 2
+Socket Options
+.PP
+It is possible to set and get a number of options on sockets
+via the \fIsetsockopt\fP and \fIgetsockopt\fP system calls.
+These options include such things as marking a socket for
+broadcasting, not to route, to linger on close, etc.
+In addition, there are protocol-specific options for IP and TCP,
+as described in
+.IR ip (4),
+.IR tcp (4),
+and in the section on multicasting below.
+.PP
+The general forms of the calls are:
+.DS
+setsockopt(s, level, optname, optval, optlen);
+.DE
+and
+.DS
+getsockopt(s, level, optname, optval, optlen);
+.DE
+.PP
+The parameters to the calls are as follows: \fIs\fP
+is the socket on which the option is to be applied.
+\fILevel\fP specifies the protocol layer on which the
+option is to be applied; in most cases this is
+the ``socket level'', indicated by the symbolic constant
+SOL_SOCKET, defined in \fI<sys/socket.h>.\fP
+The actual option is specified in \fIoptname\fP, and is
+a symbolic constant also defined in \fI<sys/socket.h>\fP.
+\fIOptval\fP and \fIOptlen\fP point to the value of the
+option (in most cases, whether the option is to be turned
+on or off), and the length of the value of the option,
+respectively.
+For \fIgetsockopt\fP, \fIoptlen\fP is
+a value-result parameter, initially set to the size of
+the storage area pointed to by \fIoptval\fP, and modified
+upon return to indicate the actual amount of storage used.
+.PP
+An example should help clarify things. It is sometimes
+useful to determine the type (e.g., stream, datagram, etc.)
+of an existing socket; programs
+under \fIinetd\fP (described below) may need to perform this
+task. This can be accomplished as follows via the
+SO_TYPE socket option and the \fIgetsockopt\fP call:
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+
+int type, size;
+
+size = sizeof (int);
+
+if (getsockopt(s, SOL_SOCKET, SO_TYPE, (char *) &type, &size) < 0) {
+ ...
+}
+.DE
+After the \fIgetsockopt\fP call, \fItype\fP will be set
+to the value of the socket type, as defined in
+\fI<sys/socket.h>\fP. If, for example, the socket were
+a datagram socket, \fItype\fP would have the value
+corresponding to SOCK_DGRAM.
+.NH 2
+Broadcasting and determining network configuration
+.PP
+By using a datagram socket, it is possible to send broadcast
+packets on many networks supported by the system.
+The network itself must support broadcast; the system
+provides no simulation of broadcast in software.
+Broadcast messages can place a high load on a network since they force
+every host on the network to service them. Consequently,
+the ability to send broadcast packets has been limited
+to sockets which are explicitly marked as allowing broadcasting.
+Broadcast is typically used for one of two reasons:
+it is desired to find a resource on a local network without prior
+knowledge of its address,
+or important functions such as routing require that information
+be sent to all accessible neighbors.
+.PP
+Multicasting is an alternative to broadcasting.
+Setting up IP multicast sockets is described in the next section.
+.PP
+To send a broadcast message, a datagram socket
+should be created:
+.DS
+s = socket(AF_INET, SOCK_DGRAM, 0);
+.DE
+or
+.DS
+s = socket(AF_NS, SOCK_DGRAM, 0);
+.DE
+The socket is marked as allowing broadcasting,
+.DS
+int on = 1;
+
+setsockopt(s, SOL_SOCKET, SO_BROADCAST, &on, sizeof (on));
+.DE
+and at least a port number should be bound to the socket:
+.DS
+sin.sin_family = AF_INET;
+sin.sin_addr.s_addr = htonl(INADDR_ANY);
+sin.sin_port = htons(MYPORT);
+bind(s, (struct sockaddr *) &sin, sizeof (sin));
+.DE
+or, for the NS domain,
+.DS
+sns.sns_family = AF_NS;
+netnum = htonl(net);
+sns.sns_addr.x_net = *(union ns_net *) &netnum; /* insert net number */
+sns.sns_addr.x_port = htons(MYPORT);
+bind(s, (struct sockaddr *) &sns, sizeof (sns));
+.DE
+The destination address of the message to be broadcast
+depends on the network(s) on which the message is to be broadcast.
+The Internet domain supports a shorthand notation for broadcast
+on the local network, the address INADDR_BROADCAST (defined in
+<\fInetinet/in.h\fP>.
+To determine the list of addresses for all reachable neighbors
+requires knowledge of the networks to which the host is connected.
+Since this information should
+be obtained in a host-independent fashion and may be impossible
+to derive, 4.4BSD provides a method of
+retrieving this information from the system data structures.
+The SIOCGIFCONF \fIioctl\fP call returns the interface
+configuration of a host in the form of a
+single \fIifconf\fP structure; this structure contains
+a ``data area'' which is made up of an array of
+of \fIifreq\fP structures, one for each network interface
+to which the host is connected.
+These structures are defined in
+\fI<net/if.h>\fP as follows:
+.DS
+.if t .ta .5i 1.0i 1.5i 3.5i
+.if n .ta .7i 1.4i 2.1i 3.4i
+struct ifconf {
+ int ifc_len; /* size of associated buffer */
+ union {
+ caddr_t ifcu_buf;
+ struct ifreq *ifcu_req;
+ } ifc_ifcu;
+};
+
+#define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */
+#define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */
+
+#define IFNAMSIZ 16
+
+struct ifreq {
+ char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ union {
+ struct sockaddr ifru_addr;
+ struct sockaddr ifru_dstaddr;
+ struct sockaddr ifru_broadaddr;
+ short ifru_flags;
+ caddr_t ifru_data;
+ } ifr_ifru;
+};
+
+.if t .ta \w' #define'u +\w' ifr_broadaddr'u +\w' ifr_ifru.ifru_broadaddr'u
+#define ifr_addr ifr_ifru.ifru_addr /* address */
+#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */
+#define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */
+#define ifr_flags ifr_ifru.ifru_flags /* flags */
+#define ifr_data ifr_ifru.ifru_data /* for use by interface */
+.DE
+The actual call which obtains the
+interface configuration is
+.DS
+struct ifconf ifc;
+char buf[BUFSIZ];
+
+ifc.ifc_len = sizeof (buf);
+ifc.ifc_buf = buf;
+if (ioctl(s, SIOCGIFCONF, (char *) &ifc) < 0) {
+ ...
+}
+.DE
+After this call \fIbuf\fP will contain one \fIifreq\fP structure for
+each network to which the host is connected, and
+\fIifc.ifc_len\fP will have been modified to reflect the number
+of bytes used by the \fIifreq\fP structures.
+.PP
+For each structure
+there exists a set of ``interface flags'' which tell
+whether the network corresponding to that interface is
+up or down, point to point or broadcast, etc. The
+SIOCGIFFLAGS \fIioctl\fP retrieves these
+flags for an interface specified by an \fIifreq\fP
+structure as follows:
+.DS
+struct ifreq *ifr;
+
+ifr = ifc.ifc_req;
+
+for (n = ifc.ifc_len / sizeof (struct ifreq); --n >= 0; ifr++) {
+ /*
+ * We must be careful that we don't use an interface
+ * devoted to an address family other than those intended;
+ * if we were interested in NS interfaces, the
+ * AF_INET would be AF_NS.
+ */
+ if (ifr->ifr_addr.sa_family != AF_INET)
+ continue;
+ if (ioctl(s, SIOCGIFFLAGS, (char *) ifr) < 0) {
+ ...
+ }
+ /*
+ * Skip boring cases.
+ */
+ if ((ifr->ifr_flags & IFF_UP) == 0 ||
+ (ifr->ifr_flags & IFF_LOOPBACK) ||
+ (ifr->ifr_flags & (IFF_BROADCAST | IFF_POINTTOPOINT)) == 0)
+ continue;
+.DE
+.PP
+Once the flags have been obtained, the broadcast address
+must be obtained. In the case of broadcast networks this is
+done via the SIOCGIFBRDADDR \fIioctl\fP, while for point-to-point networks
+the address of the destination host is obtained with SIOCGIFDSTADDR.
+.DS
+struct sockaddr dst;
+
+if (ifr->ifr_flags & IFF_POINTTOPOINT) {
+ if (ioctl(s, SIOCGIFDSTADDR, (char *) ifr) < 0) {
+ ...
+ }
+ bcopy((char *) ifr->ifr_dstaddr, (char *) &dst, sizeof (ifr->ifr_dstaddr));
+} else if (ifr->ifr_flags & IFF_BROADCAST) {
+ if (ioctl(s, SIOCGIFBRDADDR, (char *) ifr) < 0) {
+ ...
+ }
+ bcopy((char *) ifr->ifr_broadaddr, (char *) &dst, sizeof (ifr->ifr_broadaddr));
+}
+.DE
+.PP
+After the appropriate \fIioctl\fP's have obtained the broadcast
+or destination address (now in \fIdst\fP), the \fIsendto\fP call may be
+used:
+.DS
+ sendto(s, buf, buflen, 0, (struct sockaddr *)&dst, sizeof (dst));
+}
+.DE
+In the above loop one \fIsendto\fP occurs for every
+interface to which the host is connected that supports the notion of
+broadcast or point-to-point addressing.
+If a process only wished to send broadcast
+messages on a given network, code similar to that outlined above
+would be used, but the loop would need to find the
+correct destination address.
+.PP
+Received broadcast messages contain the senders address
+and port, as datagram sockets are bound before
+a message is allowed to go out.
+.NH 2
+IP Multicasting
+.PP
+IP multicasting is the transmission of an IP datagram to a "host
+group", a set of zero or more hosts identified by a single IP
+destination address. A multicast datagram is delivered to all
+members of its destination host group with the same "best-efforts"
+reliability as regular unicast IP datagrams, i.e., the datagram is
+not guaranteed to arrive intact at all members of the destination
+group or in the same order relative to other datagrams.
+.PP
+The membership of a host group is dynamic; that is, hosts may join
+and leave groups at any time. There is no restriction on the
+location or number of members in a host group. A host may be a
+member of more than one group at a time. A host need not be a member
+of a group to send datagrams to it.
+.PP
+A host group may be permanent or transient. A permanent group has a
+well-known, administratively assigned IP address. It is the address,
+not the membership of the group, that is permanent; at any time a
+permanent group may have any number of members, even zero. Those IP
+multicast addresses that are not reserved for permanent groups are
+available for dynamic assignment to transient groups which exist only
+as long as they have members.
+.PP
+In general, a host cannot assume that datagrams sent to any host
+group address will reach only the intended hosts, or that datagrams
+received as a member of a transient host group are intended for the
+recipient. Misdelivery must be detected at a level above IP, using
+higher-level identifiers or authentication tokens. Information
+transmitted to a host group address should be encrypted or governed
+by administrative routing controls if the sender is concerned about
+unwanted listeners.
+.PP
+IP multicasting is currently supported only on AF_INET sockets of type
+SOCK_DGRAM and SOCK_RAW, and only on subnetworks for which the interface
+driver has been modified to support multicasting.
+.PP
+The next subsections describe how to send and receive multicast datagrams.
+.NH 3
+Sending IP Multicast Datagrams
+.PP
+To send a multicast datagram, specify an IP multicast address in the range
+224.0.0.0 to 239.255.255.255 as the destination address
+in a
+.IR sendto (2)
+call.
+.PP
+The definitions required for the multicast-related socket options are
+found in \fI<netinet/in.h>\fP.
+All IP addresses are passed in network byte-order.
+.PP
+By default, IP multicast datagrams are sent with a time-to-live (TTL) of 1,
+which prevents them from being forwarded beyond a single subnetwork. A new
+socket option allows the TTL for subsequent multicast datagrams to be set to
+any value from 0 to 255, in order to control the scope of the multicasts:
+.DS
+u_char ttl;
+setsockopt(sock, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl));
+.DE
+Multicast datagrams with a TTL of 0 will not be transmitted on any subnet,
+but may be delivered locally if the sending host belongs to the destination
+group and if multicast loopback has not been disabled on the sending socket
+(see below). Multicast datagrams with TTL greater than one may be delivered
+to more than one subnet if there are one or more multicast routers attached
+to the first-hop subnet. To provide meaningful scope control, the multicast
+routers support the notion of TTL "thresholds", which prevent datagrams with
+less than a certain TTL from traversing certain subnets. The thresholds
+enforce the following convention:
+.TS
+center;
+l | l
+l | n.
+_
+Scope Initial TTL
+=
+restricted to the same host 0
+restricted to the same subnet 1
+restricted to the same site 32
+restricted to the same region 64
+restricted to the same continent 128
+unrestricted 255
+_
+.TE
+"Sites" and "regions" are not strictly defined, and sites may be further
+subdivided into smaller administrative units, as a local matter.
+.PP
+An application may choose an initial TTL other than the ones listed above.
+For example, an application might perform an "expanding-ring search" for a
+network resource by sending a multicast query, first with a TTL of 0, and
+then with larger and larger TTLs, until a reply is received, perhaps using
+the TTL sequence 0, 1, 2, 4, 8, 16, 32.
+.PP
+The multicast router
+.IR mrouted (8),
+refuses to forward any
+multicast datagram with a destination address between 224.0.0.0 and
+224.0.0.255, inclusive, regardless of its TTL. This range of addresses is
+reserved for the use of routing protocols and other low-level topology
+discovery or maintenance protocols, such as gateway discovery and group
+membership reporting.
+.PP
+The address 224.0.0.0 is
+guaranteed not to be assigned to any group, and 224.0.0.1 is assigned
+to the permanent group of all IP hosts (including gateways). This is
+used to address all multicast hosts on the directly connected
+network. There is no multicast address (or any other IP address) for
+all hosts on the total Internet. The addresses of other well-known,
+permanent groups are published in the "Assigned Numbers" RFC,
+which is available from the InterNIC.
+.PP
+Each multicast transmission is sent from a single network interface, even if
+the host has more than one multicast-capable interface. (If the host is
+also serving as a multicast router,
+a multicast may be \fIforwarded\fP to interfaces
+other than originating interface, provided that the TTL is greater than 1.)
+The default interface to be used for multicasting is the primary network
+interface on the system.
+A socket option
+is available to override the default for subsequent transmissions from a
+given socket:
+.DS
+struct in_addr addr;
+setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, &addr, sizeof(addr));
+.DE
+where "addr" is the local IP address of the desired outgoing interface.
+An address of INADDR_ANY may be used to revert to the default interface.
+The local IP address of an interface can be obtained via the SIOCGIFCONF
+ioctl. To determine if an interface supports multicasting, fetch the
+interface flags via the SIOCGIFFLAGS ioctl and see if the IFF_MULTICAST
+flag is set. (Normal applications should not need to use this option; it
+is intended primarily for multicast routers and other system services
+specifically concerned with internet topology.)
+The SIOCGIFCONF and SIOCGIFFLAGS ioctls are described in the previous section.
+.PP
+If a multicast datagram is sent to a group to which the sending host itself
+belongs (on the outgoing interface), a copy of the datagram is, by default,
+looped back by the IP layer for local delivery. Another socket option gives
+the sender explicit control over whether or not subsequent datagrams are
+looped back:
+.DS
+u_char loop;
+setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop));
+.DE
+where \f2loop\f1 is set to 0 to disable loopback,
+and set to 1 to enable loopback.
+This option
+improves performance for applications that may have no more than one
+instance on a single host (such as a router demon), by eliminating
+the overhead of receiving their own transmissions. It should generally not
+be used by applications for which there may be more than one instance on a
+single host (such as a conferencing program) or for which the sender does
+not belong to the destination group (such as a time querying program).
+.PP
+A multicast datagram sent with an initial TTL greater than 1 may be delivered
+to the sending host on a different interface from that on which it was sent,
+if the host belongs to the destination group on that other interface. The
+loopback control option has no effect on such delivery.
+.NH 3
+Receiving IP Multicast Datagrams
+.PP
+Before a host can receive IP multicast datagrams, it must become a member
+of one or more IP multicast groups. A process can ask the host to join
+a multicast group by using the following socket option:
+.DS
+struct ip_mreq mreq;
+setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq))
+.DE
+where "mreq" is the following structure:
+.DS
+struct ip_mreq {
+ struct in_addr imr_multiaddr; /* \fImulticast group to join\fP */
+ struct in_addr imr_interface; /* \fIinterface to join on\fP */
+}
+.DE
+Every membership is associated with a single interface, and it is possible
+to join the same group on more than one interface. "imr_interface" should
+be INADDR_ANY to choose the default multicast interface, or one of the
+host's local addresses to choose a particular (multicast-capable) interface.
+Up to IP_MAX_MEMBERSHIPS (currently 20) memberships may be added on a
+single socket.
+.PP
+To drop a membership, use:
+.DS
+struct ip_mreq mreq;
+setsockopt(sock, IPPROTO_IP, IP_DROP_MEMBERSHIP, &mreq, sizeof(mreq));
+.DE
+where "mreq" contains the same values as used to add the membership. The
+memberships associated with a socket are also dropped when the socket is
+closed or the process holding the socket is killed. However, more than
+one socket may claim a membership in a particular group, and the host
+will remain a member of that group until the last claim is dropped.
+.PP
+The memberships associated with a socket do not necessarily determine which
+datagrams are received on that socket. Incoming multicast packets are
+accepted by the kernel IP layer if any socket has claimed a membership in the
+destination group of the datagram; however, delivery of a multicast datagram
+to a particular socket is based on the destination port (or protocol type, for
+raw sockets), just as with unicast datagrams.
+To receive multicast datagrams
+sent to a particular port, it is necessary to bind to that local port,
+leaving the local address unspecified (i.e., INADDR_ANY).
+To receive multicast datagrams
+sent to a particular group and port, bind to the local port, with
+the local address set to the multicast group address.
+Once bound to a multicast address, the socket cannot be used for sending data.
+.PP
+More than one process may bind to the same SOCK_DGRAM UDP port
+or the same multicast group and port if the
+.I bind
+call is preceded by:
+.DS
+int on = 1;
+setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
+.DE
+All processes sharing the port must enable this option.
+Every incoming multicast or broadcast UDP datagram destined to
+the shared port is delivered to all sockets bound to the port.
+For backwards compatibility reasons, this does not apply to incoming
+unicast datagrams. Unicast
+datagrams are never delivered to more than one socket, regardless of
+how many sockets are bound to the datagram's destination port.
+.PP
+A final multicast-related extension is independent of IP: two new ioctls,
+SIOCADDMULTI and SIOCDELMULTI, are available to add or delete link-level
+(e.g., Ethernet) multicast addresses accepted by a particular interface.
+The address to be added or deleted is passed as a sockaddr structure of
+family AF_UNSPEC, within the standard ifreq structure.
+.PP
+These ioctls are
+for the use of protocols other than IP, and require superuser privileges.
+A link-level multicast address added via SIOCADDMULTI is not automatically
+deleted when the socket used to add it goes away; it must be explicitly
+deleted. It is inadvisable to delete a link-level address that may be
+in use by IP.
+.NH 3
+Sample Multicast Program
+.PP
+The following program sends or receives multicast packets.
+If invoked with one argument, it sends a packet containing the current
+time to an arbitrarily-chosen multicast group and UDP port.
+If invoked with no arguments, it receives and prints these packets.
+Start it as a sender on just one host and as a receiver on all the other hosts.
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <time.h>
+#include <stdio.h>
+
+#define EXAMPLE_PORT 60123
+#define EXAMPLE_GROUP "224.0.0.250"
+
+main(argc)
+ int argc;
+{
+ struct sockaddr_in addr;
+ int addrlen, fd, cnt;
+ struct ip_mreq mreq;
+ char message[50];
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ perror("socket");
+ exit(1);
+ }
+
+ bzero(&addr, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = htons(EXAMPLE_PORT);
+ addrlen = sizeof(addr);
+
+ if (argc > 1) { /* Send */
+ addr.sin_addr.s_addr = inet_addr(EXAMPLE_GROUP);
+ while (1) {
+ time_t t = time(0);
+ sprintf(message, "time is %-24.24s", ctime(&t));
+ cnt = sendto(fd, message, sizeof(message), 0,
+ (struct sockaddr *)&addr, addrlen);
+ if (cnt < 0) {
+ perror("sendto");
+ exit(1);
+ }
+ sleep(5);
+ }
+ } else { /* Receive */
+ if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ perror("bind");
+ exit(1);
+ }
+
+ mreq.imr_multiaddr.s_addr = inet_addr(EXAMPLE_GROUP);
+ mreq.imr_interface.s_addr = htonl(INADDR_ANY);
+ if (setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP,
+ &mreq, sizeof(mreq)) < 0) {
+ perror("setsockopt mreq");
+ exit(1);
+ }
+
+ while (1) {
+ cnt = recvfrom(fd, message, sizeof(message), 0,
+ (struct sockaddr *)&addr, &addrlen);
+ if (cnt <= 0) {
+ if (cnt == 0) {
+ break;
+ }
+ perror("recvfrom");
+ exit(1);
+ }
+ printf("%s: message = \e"%s\e"\en",
+ inet_ntoa(addr.sin_addr), message);
+ }
+ }
+}
+.DE
+.\"----------------------------------------------------------------------
+.NH 2
+NS Packet Sequences
+.PP
+The semantics of NS connections demand that
+the user both be able to look inside the network header associated
+with any incoming packet and be able to specify what should go
+in certain fields of an outgoing packet.
+Using different calls to \fIsetsockopt\fP, it is possible
+to indicate whether prototype headers will be associated by
+the user with each outgoing packet (SO_HEADERS_ON_OUTPUT),
+to indicate whether the headers received by the system should be
+delivered to the user (SO_HEADERS_ON_INPUT), or to indicate
+default information that should be associated with all
+outgoing packets on a given socket (SO_DEFAULT_HEADERS).
+.PP
+The contents of a SPP header (minus the IDP header) are:
+.DS
+.if t .ta \w" #define"u +\w" u_short"u +2.0i
+struct sphdr {
+ u_char sp_cc; /* connection control */
+#define SP_SP 0x80 /* system packet */
+#define SP_SA 0x40 /* send acknowledgement */
+#define SP_OB 0x20 /* attention (out of band data) */
+#define SP_EM 0x10 /* end of message */
+ u_char sp_dt; /* datastream type */
+ u_short sp_sid; /* source connection identifier */
+ u_short sp_did; /* destination connection identifier */
+ u_short sp_seq; /* sequence number */
+ u_short sp_ack; /* acknowledge number */
+ u_short sp_alo; /* allocation number */
+};
+.DE
+Here, the items of interest are the \fIdatastream type\fP and
+the \fIconnection control\fP fields. The semantics of the
+datastream type are defined by the application(s) in question;
+the value of this field is, by default, zero, but it can be
+used to indicate things such as Xerox's Bulk Data Transfer
+Protocol (in which case it is set to one). The connection control
+field is a mask of the flags defined just below it. The user may
+set or clear the end-of-message bit to indicate
+that a given message is the last of a given substream type,
+or may set/clear the attention bit as an alternate way to
+indicate that a packet should be sent out-of-band.
+As an example, to associate prototype headers with outgoing
+SPP packets, consider:
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netns/ns.h>
+#include <netns/sp.h>
+ ...
+struct sockaddr_ns sns, to;
+int s, on = 1;
+struct databuf {
+ struct sphdr proto_spp; /* prototype header */
+ char buf[534]; /* max. possible data by Xerox std. */
+} buf;
+ ...
+s = socket(AF_NS, SOCK_SEQPACKET, 0);
+ ...
+bind(s, (struct sockaddr *) &sns, sizeof (sns));
+setsockopt(s, NSPROTO_SPP, SO_HEADERS_ON_OUTPUT, &on, sizeof(on));
+ ...
+buf.proto_spp.sp_dt = 1; /* bulk data */
+buf.proto_spp.sp_cc = SP_EM; /* end-of-message */
+strcpy(buf.buf, "hello world\en");
+sendto(s, (char *) &buf, sizeof(struct sphdr) + strlen("hello world\en"),
+ (struct sockaddr *) &to, sizeof(to));
+ ...
+.DE
+Note that one must be careful when writing headers; if the prototype
+header is not written with the data with which it is to be associated,
+the kernel will treat the first few bytes of the data as the
+header, with unpredictable results.
+To turn off the above association, and to indicate that packet
+headers received by the system should be passed up to the user,
+one might use:
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netns/ns.h>
+#include <netns/sp.h>
+ ...
+struct sockaddr sns;
+int s, on = 1, off = 0;
+ ...
+s = socket(AF_NS, SOCK_SEQPACKET, 0);
+ ...
+bind(s, (struct sockaddr *) &sns, sizeof (sns));
+setsockopt(s, NSPROTO_SPP, SO_HEADERS_ON_OUTPUT, &off, sizeof(off));
+setsockopt(s, NSPROTO_SPP, SO_HEADERS_ON_INPUT, &on, sizeof(on));
+ ...
+.DE
+.PP
+Output is handled somewhat differently in the IDP world.
+The header of an IDP-level packet looks like:
+.DS
+.if t .ta \w'struct 'u +\w" struct ns_addr"u +2.0i
+struct idp {
+ u_short idp_sum; /* Checksum */
+ u_short idp_len; /* Length, in bytes, including header */
+ u_char idp_tc; /* Transport Control (i.e., hop count) */
+ u_char idp_pt; /* Packet Type (i.e., level 2 protocol) */
+ struct ns_addr idp_dna; /* Destination Network Address */
+ struct ns_addr idp_sna; /* Source Network Address */
+};
+.DE
+The primary field of interest in an IDP header is the \fIpacket type\fP
+field. The standard values for this field are (as defined
+in <\fInetns/ns.h\fP>):
+.DS
+.if t .ta \w" #define"u +\w" NSPROTO_ERROR"u +1.0i
+#define NSPROTO_RI 1 /* Routing Information */
+#define NSPROTO_ECHO 2 /* Echo Protocol */
+#define NSPROTO_ERROR 3 /* Error Protocol */
+#define NSPROTO_PE 4 /* Packet Exchange */
+#define NSPROTO_SPP 5 /* Sequenced Packet */
+.DE
+For SPP connections, the contents of this field are
+automatically set to NSPROTO_SPP; for IDP packets,
+this value defaults to zero, which means ``unknown''.
+.PP
+Setting the value of that field with SO_DEFAULT_HEADERS is
+easy:
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netns/ns.h>
+#include <netns/idp.h>
+ ...
+struct sockaddr sns;
+struct idp proto_idp; /* prototype header */
+int s, on = 1;
+ ...
+s = socket(AF_NS, SOCK_DGRAM, 0);
+ ...
+bind(s, (struct sockaddr *) &sns, sizeof (sns));
+proto_idp.idp_pt = NSPROTO_PE; /* packet exchange */
+setsockopt(s, NSPROTO_IDP, SO_DEFAULT_HEADERS, (char *) &proto_idp,
+ sizeof(proto_idp));
+ ...
+.DE
+.PP
+Using SO_HEADERS_ON_OUTPUT is somewhat more difficult. When
+SO_HEADERS_ON_OUTPUT is turned on for an IDP socket, the socket
+becomes (for all intents and purposes) a raw socket. In this
+case, all the fields of the prototype header (except the
+length and checksum fields, which are computed by the kernel)
+must be filled in correctly in order for the socket to send and
+receive data in a sensible manner. To be more specific, the
+source address must be set to that of the host sending the
+data; the destination address must be set to that of the
+host for whom the data is intended; the packet type must be
+set to whatever value is desired; and the hopcount must be
+set to some reasonable value (almost always zero). It should
+also be noted that simply sending data using \fIwrite\fP
+will not work unless a \fIconnect\fP or \fIsendto\fP call
+is used, in spite of the fact that it is the destination
+address in the prototype header that is used, not the one
+given in either of those calls. For almost
+all IDP applications , using SO_DEFAULT_HEADERS is easier and
+more desirable than writing headers.
+.NH 2
+Three-way Handshake
+.PP
+The semantics of SPP connections indicates that a three-way
+handshake, involving changes in the datastream type, should \(em
+but is not absolutely required to \(em take place before a SPP
+connection is closed. Almost all SPP connections are
+``well-behaved'' in this manner; when communicating with
+any process, it is best to assume that the three-way handshake
+is required unless it is known for certain that it is not
+required. In a three-way close, the closing process
+indicates that it wishes to close the connection by sending
+a zero-length packet with end-of-message set and with
+datastream type 254. The other side of the connection
+indicates that it is OK to close by sending a zero-length
+packet with end-of-message set and datastream type 255. Finally,
+the closing process replies with a zero-length packet with
+substream type 255; at this point, the connection is considered
+closed. The following code fragments are simplified examples
+of how one might handle this three-way handshake at the user
+level; in the future, support for this type of close will
+probably be provided as part of the C library or as part of
+the kernel. The first code fragment below illustrates how a process
+might handle three-way handshake if it sees that the process it
+is communicating with wants to close the connection:
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netns/ns.h>
+#include <netns/sp.h>
+ ...
+#ifndef SPPSST_END
+#define SPPSST_END 254
+#define SPPSST_ENDREPLY 255
+#endif
+struct sphdr proto_sp;
+int s;
+ ...
+read(s, buf, BUFSIZE);
+if (((struct sphdr *)buf)->sp_dt == SPPSST_END) {
+ /*
+ * SPPSST_END indicates that the other side wants to
+ * close.
+ */
+ proto_sp.sp_dt = SPPSST_ENDREPLY;
+ proto_sp.sp_cc = SP_EM;
+ setsockopt(s, NSPROTO_SPP, SO_DEFAULT_HEADERS, (char *)&proto_sp,
+ sizeof(proto_sp));
+ write(s, buf, 0);
+ /*
+ * Write a zero-length packet with datastream type = SPPSST_ENDREPLY
+ * to indicate that the close is OK with us. The packet that we
+ * don't see (because we don't look for it) is another packet
+ * from the other side of the connection, with SPPSST_ENDREPLY
+ * on it it, too. Once that packet is sent, the connection is
+ * considered closed; note that we really ought to retransmit
+ * the close for some time if we do not get a reply.
+ */
+ close(s);
+}
+ ...
+.DE
+To indicate to another process that we would like to close the
+connection, the following code would suffice:
+.DS
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netns/ns.h>
+#include <netns/sp.h>
+ ...
+#ifndef SPPSST_END
+#define SPPSST_END 254
+#define SPPSST_ENDREPLY 255
+#endif
+struct sphdr proto_sp;
+int s;
+ ...
+proto_sp.sp_dt = SPPSST_END;
+proto_sp.sp_cc = SP_EM;
+setsockopt(s, NSPROTO_SPP, SO_DEFAULT_HEADERS, (char *)&proto_sp,
+ sizeof(proto_sp));
+write(s, buf, 0); /* send the end request */
+proto_sp.sp_dt = SPPSST_ENDREPLY;
+setsockopt(s, NSPROTO_SPP, SO_DEFAULT_HEADERS, (char *)&proto_sp,
+ sizeof(proto_sp));
+/*
+ * We assume (perhaps unwisely)
+ * that the other side will send the
+ * ENDREPLY, so we'll just send our final ENDREPLY
+ * as if we'd seen theirs already.
+ */
+write(s, buf, 0);
+close(s);
+ ...
+.DE
+.NH 2
+Packet Exchange
+.PP
+The Xerox standard protocols include a protocol that is both
+reliable and datagram-oriented. This protocol is known as
+Packet Exchange (PEX or PE) and, like SPP, is layered on top
+of IDP. PEX is important for a number of things: Courier
+remote procedure calls may be expedited through the use
+of PEX, and many Xerox servers are located by doing a PEX
+``BroadcastForServers'' operation. Although there is no
+implementation of PEX in the kernel,
+it may be simulated at the user level with some clever coding
+and the use of one peculiar \fIgetsockopt\fP. A PEX packet
+looks like:
+.DS
+.if t .ta \w'struct 'u +\w" struct idp"u +2.0i
+/*
+ * The packet-exchange header shown here is not defined
+ * as part of any of the system include files.
+ */
+struct pex {
+ struct idp p_idp; /* idp header */
+ u_short ph_id[2]; /* unique transaction ID for pex */
+ u_short ph_client; /* client type field for pex */
+};
+.DE
+The \fIph_id\fP field is used to hold a ``unique id'' that
+is used in duplicate suppression; the \fIph_client\fP
+field indicates the PEX client type (similar to the packet
+type field in the IDP header). PEX reliability stems from the
+fact that it is an idempotent (``I send a packet to you, you
+send a packet to me'') protocol. Processes on each side of
+the connection may use the unique id to determine if they have
+seen a given packet before (the unique id field differs on each
+packet sent) so that duplicates may be detected, and to indicate
+which message a given packet is in response to. If a packet with
+a given unique id is sent and no response is received in a given
+amount of time, the packet is retransmitted until it is decided
+that no response will ever be received. To simulate PEX, one
+must be able to generate unique ids -- something that is hard to
+do at the user level with any real guarantee that the id is really
+unique. Therefore, a means (via \fIgetsockopt\fP) has been provided
+for getting unique ids from the kernel. The following code fragment
+indicates how to get a unique id:
+.DS
+long uniqueid;
+int s, idsize = sizeof(uniqueid);
+ ...
+s = socket(AF_NS, SOCK_DGRAM, 0);
+ ...
+/* get id from the kernel -- only on IDP sockets */
+getsockopt(s, NSPROTO_PE, SO_SEQNO, (char *)&uniqueid, &idsize);
+ ...
+.DE
+The retransmission and duplicate suppression code required to
+simulate PEX fully is left as an exercise for the reader.
+.NH 2
+Inetd
+.PP
+One of the daemons provided with 4.4BSD is \fIinetd\fP, the
+so called ``internet super-server.''
+Having one daemon listen for requests for many daemons
+instead of having each daemon listen for its own requests
+reduces the number of idle daemons and simplies their implementation.
+.I Inetd
+handles
+two types of services: standard and TCPMUX.
+A standard service has a well-known port assigned to it and
+is listed in
+.I /etc/services
+(see \f2services\f1(5));
+it may be a service that implements an official Internet standard or is a
+BSD-specific service.
+TCPMUX services are nonstandard and do not have a
+well-known port assigned to them.
+They are invoked from
+.I inetd
+when a program connects to the "tcpmux" well-known port and specifies
+the service name.
+This is useful for adding locally-developed servers.
+.PP
+\fIInetd\fP is invoked at boot
+time, and determines from the file \fI/etc/inetd.conf\fP the
+servers for which it is to listen. Once this information has been
+read and a pristine environment created, \fIinetd\fP proceeds
+to create one socket for each service it is to listen for,
+binding the appropriate port number to each socket.
+.PP
+\fIInetd\fP then performs a \fIselect\fP on all these
+sockets for read availability, waiting for somebody wishing
+a connection to the service corresponding to
+that socket. \fIInetd\fP then performs an \fIaccept\fP on
+the socket in question, \fIfork\fPs, \fIdup\fPs the new
+socket to file descriptors 0 and 1 (stdin and
+stdout), closes other open file
+descriptors, and \fIexec\fPs the appropriate server.
+.PP
+Servers making use of \fIinetd\fP are considerably simplified,
+as \fIinetd\fP takes care of the majority of the IPC work
+required in establishing a connection. The server invoked
+by \fIinetd\fP expects the socket connected to its client
+on file descriptors 0 and 1, and may immediately perform
+any operations such as \fIread\fP, \fIwrite\fP, \fIsend\fP,
+or \fIrecv\fP. Indeed, servers may use
+buffered I/O as provided by the ``stdio'' conventions, as
+long as as they remember to use \fIfflush\fP when appropriate.
+.PP
+One call which may be of interest to individuals writing
+servers under \fIinetd\fP is the \fIgetpeername\fP call,
+which returns the address of the peer (process) connected
+on the other end of the socket. For example, to log the
+Internet address in ``dot notation'' (e.g., ``128.32.0.4'')
+of a client connected to a server under
+\fIinetd\fP, the following code might be used:
+.DS
+struct sockaddr_in name;
+int namelen = sizeof (name);
+ ...
+if (getpeername(0, (struct sockaddr *)&name, &namelen) < 0) {
+ syslog(LOG_ERR, "getpeername: %m");
+ exit(1);
+} else
+ syslog(LOG_INFO, "Connection from %s", inet_ntoa(name.sin_addr));
+ ...
+.DE
+While the \fIgetpeername\fP call is especially useful when
+writing programs to run with \fIinetd\fP, it can be used
+under other circumstances. Be warned, however, that \fIgetpeername\fP will
+fail on UNIX domain sockets.
+.PP
+Standard TCP
+services are assigned unique well-known port numbers in the range of
+0 to 1023 by the
+Internet Assigned Numbers Authority (IANA@ISI.EDU).
+The limited number of ports in this range are
+assigned to official Internet protocols.
+The TCPMUX service allows you to add
+locally-developed protocols without needing an official TCP port assignment.
+The TCPMUX protocol described in RFC-1078 is simple:
+.QP
+``A TCP client connects to a foreign host on TCP port 1. It sends the
+service name followed by a carriage-return line-feed <CRLF>.
+The service name is never case sensitive.
+The server replies with a
+single character indicating positive ("+") or negative ("\-")
+acknowledgment, immediately followed by an optional message of
+explanation, terminated with a <CRLF>. If the reply was positive,
+the selected protocol begins; otherwise the connection is closed.''
+.LP
+In 4.4BSD, the TCPMUX service is built into
+.IR inetd ,
+that is,
+.IR inetd
+listens on TCP port 1 for requests for TCPMUX services listed
+in \f2inetd.conf\f1.
+.IR inetd (8)
+describes the format of TCPMUX entries for \f2inetd.conf\f1.
+.PP
+The following is an example TCPMUX server and its \f2inetd.conf\f1 entry.
+More sophisticated servers may want to do additional processing
+before returning the positive or negative acknowledgement.
+.DS
+#include <sys/types.h>
+#include <stdio.h>
+
+main()
+{
+ time_t t;
+
+ printf("+Go\er\en");
+ fflush(stdout);
+ time(&t);
+ printf("%d = %s", t, ctime(&t));
+ fflush(stdout);
+}
+.DE
+The \f2inetd.conf\f1 entry is:
+.DS
+tcpmux/current_time stream tcp nowait nobody /d/curtime curtime
+.DE
+Here's the portion of the client code that handles the TCPMUX handshake:
+.DS
+char line[BUFSIZ];
+FILE *fp;
+ ...
+
+/* Use stdio for reading data from the server */
+fp = fdopen(sock, "r");
+if (fp == NULL) {
+ fprintf(stderr, "Can't create file pointer\en");
+ exit(1);
+}
+
+/* Send service request */
+sprintf(line, "%s\er\en", "current_time");
+if (write(sock, line, strlen(line)) < 0) {
+ perror("write");
+ exit(1);
+}
+
+/* Get ACK/NAK response from the server */
+if (fgets(line, sizeof(line), fp) == NULL) {
+ if (feof(fp)) {
+ die();
+ } else {
+ fprintf(stderr, "Error reading response\en");
+ exit(1);
+ }
+}
+
+/* Delete <CR> */
+if ((lp = index(line, '\r')) != NULL) {
+ *lp = '\0';
+}
+
+switch (line[0]) {
+ case '+':
+ printf("Got ACK: %s\en", &line[1]);
+ break;
+ case '-':
+ printf("Got NAK: %s\en", &line[1]);
+ exit(0);
+ default:
+ printf("Got unknown response: %s\en", line);
+ exit(1);
+}
+
+/* Get rest of data from the server */
+while ((fgets(line, sizeof(line), fp)) != NULL) {
+ fputs(line, stdout);
+}
+.DE
diff --git a/share/doc/psd/21.ipc/Makefile b/share/doc/psd/21.ipc/Makefile
new file mode 100644
index 0000000..2a366be
--- /dev/null
+++ b/share/doc/psd/21.ipc/Makefile
@@ -0,0 +1,10 @@
+# @(#)Makefile 8.1 (Berkeley) 6/8/93
+
+DIR= psd/21.ipc
+SRCS= 0.t 1.t 2.t 3.t 4.t 5.t
+MACROS= -ms
+
+paper.ps: ${SRCS}
+ ${TBL} ${SRCS} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/psd/21.ipc/spell.ok b/share/doc/psd/21.ipc/spell.ok
new file mode 100644
index 0000000..02b45d4
--- /dev/null
+++ b/share/doc/psd/21.ipc/spell.ok
@@ -0,0 +1,347 @@
+4.2bsd
+AF
+ANYP
+BUFSIZ
+BUFSIZE
+BroadcastForServers
+CF
+CLR
+CRMOD
+Clearinghouse
+DARPA
+DESTPORT
+DGRAM
+DONTROUTE
+Datagram
+EADDRINUSE
+EADDRNOTAVAIL
+EAGAIN
+ECONNREFUSED
+EHOSTDOWN
+EHOSTUNREACH
+EINTR
+ENDREPLY
+ENETDOWN
+ENETUNREACH
+ENOBUFS
+EPROTONOSUPPORT
+EPROTOTYPE
+ETIMEDOUT
+EWOULDBLOCK
+Ethernet
+FASYNC
+FCREATE
+FD
+FNDELAY
+FTP
+FTRUNCATE
+FWRITE
+FWRONLY
+Fabry
+GETOWN
+Gethostybyname
+IDP
+IFF
+IFNAMSIZ
+INADDR
+INET
+INFO
+IP
+IPC
+IPPORT
+ISSET
+Inetd
+LF
+LH
+LOOPBACK
+Lapsley
+Leffler
+MSG
+MYADDRESS
+MYPORT
+NS
+NSPROTO
+OB
+OOB
+OOBINLINE
+Optlen
+Optval
+PE
+PEX
+POINTTOPOINT
+PS1:8
+RDONLY
+RDWR
+REUSEADDR
+RF
+RH
+RWHODIR
+SEQNO
+SEQPACKET
+SETFL
+SETOWN
+SETSIZE
+SIGALRM
+SIGCHLD
+SIGIO
+SIGURG
+SIOCATMARK
+SIOCGIFBRDADDR
+SIOCGIFCONF
+SIOCGIFDSTADDR
+SIOCGIFFLAGS
+SIOCGPGRP
+SIOCSPGRP
+SOF
+SP
+SPP
+SPPSST
+Science:UofMaryland
+TCP
+TELNET
+TIOCFLUSH
+TIOCGETP
+TIOCNOTTY
+TIOCSETP
+TRUNC
+Torek
+Tutorial''PS1:8
+USERRESERVED
+VAX
+WNOHANG
+WRONLY
+XSIS
+XTABS
+ack
+addr
+addr.s
+addr.sa
+addr.sun
+addr.x
+addrtype
+alo
+argc
+argv
+arpa
+b.sg
+bcmp
+bcopy
+broadaddr
+buf
+buf.buf
+buf.proto
+buflen
+bzero
+c.f
+cad
+caddr
+calder
+daemons
+dali
+databuf
+datagram
+datastream
+dev
+dna
+doit
+dst
+dst.sin
+dst.sns
+dstaddr
+dt
+dup2
+en0
+endhostent
+endif
+ernie
+errno
+es
+esvax
+exceptmask
+execptfds
+fcntl
+fcntl.h
+fd
+fflush
+file.h
+foo
+fprintf
+from.sin
+fromlen
+gethostbyaddr
+gethostbyname
+gethostbynameandnet
+gethostent
+gethostname
+getnetbyname
+getnetbynumber
+getnetent
+getpeername
+getprotobyname
+getprotobynumber
+getprotoent
+getservbyname
+getservbyport
+getservent
+getsockopt
+goto
+gotpty
+gyre
+gyre:Computer
+hardcoding
+hopcount
+host.c
+hostent
+hostname
+hostnames
+hosts.equiv
+htonl
+htons
+idp
+idp.h
+idp.idp
+idsize
+if.h
+ifc
+ifc.ifc
+ifconf
+ifcu
+ifcu.ifcu
+ifndef
+ifr
+ifreq
+ifru
+ifru.ifru
+in.h
+inet
+inetd
+inetd.conf
+ing
+ingres
+io
+ioctl.h
+ipc
+kim
+len
+localnet
+lport
+lq
+makeaddr
+matisse
+medea
+miro
+monet
+name.sin
+namelen
+nameserver
+nb
+netdb.h
+netent
+netinet
+netns
+netnum
+netof
+newsock
+newtcp
+nfds
+ns
+ns.h
+ntoa
+ntohl
+ntohs
+onalrm
+oob
+optlen
+optname
+optval
+oz
+pathname
+pathnames
+pex
+pgrp
+ph
+pp
+proto
+protoent
+pt
+pty
+ptyXX
+ptyp
+ptyxy
+queueing
+readfds
+readmask
+recv
+recvfrom
+recvtime
+rem
+req
+rhosts
+rlogin
+rlogind
+rq
+rresvport
+ruptime
+rwho
+rwhod
+sendto
+servent
+server.sin
+server.sun
+sethostent
+setsockopt
+sid
+sigvec
+sin.sin
+sizeof
+sna
+snew
+sns
+sns.sns
+sockaddr
+socket.h
+sp
+sp.h
+sp.sp
+sphdr
+spp
+spp.sp
+sprintf
+statbuf
+statvax
+std
+stderr
+stdin
+stdio.h
+stdout
+strcmp
+strcpy
+strlen
+syslog
+ta
+tcp
+telnet
+time.h
+timeval
+tmp
+tolen
+ttyxy
+tuples
+types.h
+ucbvax
+udp
+un
+un.h
+uniqueid
+useable
+usec
+val
+wait.h
+wait.tv
+wd
+wd.wd
+whod
+wildcard
+wildcarded
+writefds
+writemask
diff --git a/share/doc/psd/Makefile b/share/doc/psd/Makefile
new file mode 100644
index 0000000..6045b22
--- /dev/null
+++ b/share/doc/psd/Makefile
@@ -0,0 +1,22 @@
+# @(#)Makefile 8.1 (Berkeley) 6/8/93
+
+# The following modules do not build/install:
+# 10.gdb, 13.rcs
+
+BINDIR= /usr/share/doc/psd
+FILES= 00.contents Makefile Title
+SUBDIR= 01.cacm 02.implement 03.iosys 04.uprog 05.sysman 06.Clang \
+ 07.pascal 08.f77 09.f77io 11.adb 12.make 14.sccs 15.yacc \
+ 16.lex 17.m4 18.gprof 19.curses 20.ipctut 21.ipc
+
+Title.ps: ${FILES}
+ groff Title > ${.TARGET}
+
+contents.ps: ${FILES}
+ groff -ms 00.contents > ${.TARGET}
+
+beforeinstall:
+ install -c -o ${BINOWN} -g ${BINGRP} -m 444 ${FILES} \
+ ${DESTDIR}${BINDIR}
+
+.include <bsd.subdir.mk>
diff --git a/share/doc/psd/Title b/share/doc/psd/Title
new file mode 100644
index 0000000..2ec146c
--- /dev/null
+++ b/share/doc/psd/Title
@@ -0,0 +1,131 @@
+.\" Copyright (c) 1986, 1993 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)Title 8.2 (Berkeley) 4/19/94
+.\"
+.ps 18
+.vs 22
+.sp 2.75i
+.ft B
+.ce 3
+UNIX Programmer's Supplementary Documents
+(PSD)
+.ps 14
+.vs 16
+.sp |4i
+.ce 2
+4.4 Berkeley Software Distribution
+.sp |5.75i
+.ft R
+.pt 12
+.vs 16
+.ce
+June, 1993
+.sp |8.2i
+.ce 5
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California
+Berkeley, California 94720
+.bp
+\&
+.sp |1i
+.hy 0
+.ps 10
+.vs 12p
+Copyright 1979, 1980, 1983, 1986, 1993
+The Regents of the University of California. All rights reserved.
+.sp 2
+Other than the specific documents listed below as copyrighted by AT&T,
+redistribution and use of this manual in source and binary forms,
+with or without modification, are permitted provided that the
+following conditions are met:
+.sp 0.5
+.in +0.2i
+.ta 0.2i
+.ti -0.2i
+1) Redistributions of this manual must retain the copyright
+notices on this page, this list of conditions and the following disclaimer.
+.ti -0.2i
+2) Software or documentation that incorporates part of this manual must
+reproduce the copyright notices on this page, this list of conditions and
+the following disclaimer in the documentation and/or other materials
+provided with the distribution.
+.ti -0.2i
+3) All advertising materials mentioning features or use of this software
+must display the following acknowledgement:
+``This product includes software developed by the University of
+California, Berkeley and its contributors.''
+.ti -0.2i
+4) Neither the name of the University nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+.in -0.2i
+.sp
+\fB\s-1THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.\s+1\fP
+.sp 2
+Documents PSD:1, 2, 3, 4, 6, 11, 15, 16, and 17
+are copyright 1979, AT&T Bell Laboratories, Incorporated.
+Document PSD:8 is a modification of an earlier document that
+is copyrighted 1979 by AT&T Bell Laboratories, Incorporated.
+Holders of \x'-1p'UNIX\v'-4p'\s-3TM\s0\v'4p'/32V,
+System III, or System V software licenses are
+permitted to copy these documents, or any portion of them,
+as necessary for licensed use of the software,
+provided this copyright notice and statement of permission
+are included.
+.sp 2
+Document PSD:10 is part of the user contributed software and is
+copyright 1992 by the Free Software Foundation, Inc.
+Permission is granted to make and distribute verbatim copies of
+this document provided the copyright notice and this permission notice
+are preserved on all copies.
+.sp 2
+Document PSD:13 is part of the user contributed software and is
+copyright 1983 by Walter F. Tichy.
+Permission to copy the RCS documentation or any portion thereof as
+necessary for licensed use of the software is granted to licensees
+of this software, provided this copyright notice is included.
+.sp 2
+The views and conclusions contained in this manual are those of the
+authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of the University of California.
diff --git a/share/doc/smm/00.contents b/share/doc/smm/00.contents
new file mode 100644
index 0000000..ed03c7a
--- /dev/null
+++ b/share/doc/smm/00.contents
@@ -0,0 +1,161 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)00.contents 8.1 (Berkeley) 7/5/93
+.\"
+.OH '''SMM Contents'
+.EH 'SMM Contents'''
+.TL
+UNIX System Manager's Manual (SMM)
+.sp
+\s-24.4 Berkeley Software Distribution\s+2
+.sp
+\fRJune, 1993\fR
+.PP
+This volume contains manual pages and supplementary documents useful to system
+administrators.
+The information in these documents applies to
+the 4.4BSD system as distributed by U.C. Berkeley.
+.SH
+Reference Manual \- Section 8
+.tl '''(8)'
+.IP
+Section 8 of the UNIX Programmer's Manual contains information related to
+system operation, administration, and maintenance.
+.SH
+System Installation and Administration
+.IP
+.tl 'Installing and Operating 4.4BSD''SMM:1'
+.QP
+The definitive reference document for those occasions when
+you find you need to start over again.
+
+.IP
+.tl 'Building 4.4BSD Kernels with \fIConfig\fP''SMM:2'
+.QP
+In-depth discussions of the use and operation of the \fIconfig\fP
+program, and how to build your very own Unix kernel.
+
+.IP
+.tl 'Fsck \- The UNIX File System Check Program''SMM:3'
+.QP
+A reference document for using the \fIfsck\fP program during
+times of file system distress.
+
+.IP
+.tl 'Disc Quotas in a UNIX Environment''SMM:4'
+.QP
+A light introduction to the techniques
+for limiting the use of disc resources.
+
+.IP
+.tl 'A Fast File System for UNIX''SMM:5'
+.QP
+A description of the 4.4BSD file system organization,
+design and implementation.
+
+.IP
+.tl 'The 4.4BSD NFS Implementation''SMM:6'
+.QP
+An overview of the design, implementation, and use of NFS on 4.4BSD.
+
+.IP
+.tl 'Line Printer Spooler Manual''SMM:7'
+.QP
+This document describes the structure and installation procedure
+for the line printer spooling system.
+
+.IP
+.tl 'Sendmail Installation and Operation Guide''SMM:8'
+.QP
+The last word in installing and operating the \fIsendmail\fP program.
+
+.ne 3
+.IP
+.tl 'Sendmail \- An Internetwork Mail Router''SMM:9'
+.QP
+An overview document on the design and implementation of \fIsendmail\fP.
+
+.IP
+.tl 'Name Server Operations Guide for BIND''SMM:10'
+.QP
+Setting up and operating the name to Internet addressing software.
+If you have a network this will be of interest.
+
+.IP
+.tl 'Timed Installation and Operation Guide''SMM:11'
+.QP
+Describes how to maintain time synchronization between machines
+in a local network.
+
+.IP
+.tl 'The Berkeley UNIX Time Synchronization Protocol''SMM:12'
+.QP
+The protocols and algorithms used by timed,
+the network time synchronization daemon.
+
+.IP
+.tl 'AMD \- The 4.4BSD Automounter''SMM:13'
+.QP
+Automatically mounting file systems on demand.
+
+.IP
+.tl 'Installation and Operation of UUCP''SMM:14'
+.QP
+Describes the implementation of uucp; for the installer and administrator.
+
+.IP
+.tl 'A Dial\-Up Network of UNIX Systems''SMM:15'
+.QP
+Describes UUCP, a program for communicating files between UNIX systems.
+
+.IP
+.tl 'On the Security of UNIX''SMM:16'
+.QP
+Hints on how to break UNIX, and how to avoid your system being broken.
+
+.IP
+.tl 'Password Security \- A Case History''SMM:17'
+.QP
+How the bad guys used to be able to break the password algorithm, and why
+they cannot now (at least not so easily).
+
+.IP
+.tl 'Networking Implementation Notes, 4.4BSD Edition''SMM:18'
+.QP
+A concise description of the system interfaces used within the
+networking subsystem.
+
+.IP
+.tl 'The PERL Programming Language''SMM:19'
+.QP
+The Practical Extraction and Report Language is ideal for
+writing those pesky adminitration scripts.
diff --git a/share/doc/smm/01.setup/0.t b/share/doc/smm/01.setup/0.t
new file mode 100644
index 0000000..2f880eb
--- /dev/null
+++ b/share/doc/smm/01.setup/0.t
@@ -0,0 +1,131 @@
+.\" Copyright (c) 1988, 1993 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 8.1 (Berkeley) 7/27/93
+.\"
+.ds Ux \s-1UNIX\s0
+.ds Bs \s-1BSD\s0
+.\" Current version:
+.ds 4B 4.4\*(Bs
+.ds Ps 4.3\*(Bs
+.\" tape and disk naming
+.ds Mt mt
+.ds Dk sd
+.ds Dn disk
+.ds Pa c
+.\" block size used on the tape
+.ds Bb 10240
+.ds Bz 20
+.\" document date
+.ds Dy July 27, 1993
+.de Sm
+\s-1\\$1\s0\\$2
+..
+.de Pn \" pathname
+\f(CW\\$1\fP\\$2
+..
+.de Li \" literal
+\f(CW\\$1\fP\\$2
+..
+.de I \" italicize first arg
+\fI\\$1\fP\^\\$2
+..
+.de Xr \" manual reference
+\fI\\$1\fP\^\\$2
+..
+.de Fn \" function
+\fI\\$1\fP\^()\\$2
+..
+.bd S B 3
+.EH 'SMM:1-%''Installing and Operating \*(4B UNIX'
+.OH 'Installing and Operating \*(4B UNIX''SMM:1-%'
+.de Sh
+.NH \\$1
+\\$2
+.nr PD .1v
+.XS \\n%
+.ta 0.6i
+\\*(SN \\$2
+.XE
+.nr PD .3v
+..
+.TL
+Installing and Operating \*(4B UNIX
+.br
+\*(Dy
+.AU
+Marshall Kirk McKusick
+.AU
+Keith Bostic
+.AU
+Michael J. Karels
+.AU
+Samuel J. Leffler
+.AI
+Computer Systems Research Group
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, California 94720
+(415) 642-7780
+.AU
+Mike Hibler
+.AI
+Center for Software Science
+Department of Computer Science
+University of Utah
+Salt Lake City, Utah 84112
+(801) 581-5017
+.AB
+.PP
+This document contains instructions for the
+installation and operation of the
+\*(4B release of UNIX\**
+as distributed by The University of California at Berkeley.
+.FS
+UNIX is a registered trademark of USL in the USA and some other countries.
+.FE
+.PP
+It discusses procedures for installing UNIX on a new machine,
+and for upgrading an existing \*(Ps UNIX system to the new release.
+An explanation of how to lay out filesystems on available disks
+and the space requirements for various parts of the system are given.
+A brief overview of the major changes to
+the system between \*(Ps and \*(4B are outlined.
+An explanation of how to set up terminal lines and user accounts,
+and how to do system-specific tailoring is provided.
+A description of how to install and configure the \*(4B networking
+facilities is included.
+Finally, the document details system operation procedures:
+shutdown and startup, filesystem backup procedures,
+resource control, performance monitoring, and procedures for recompiling
+and reinstalling system software.
+.AE
+.bp +3
diff --git a/share/doc/smm/01.setup/1.t b/share/doc/smm/01.setup/1.t
new file mode 100644
index 0000000..2f71b77
--- /dev/null
+++ b/share/doc/smm/01.setup/1.t
@@ -0,0 +1,172 @@
+.\" Copyright (c) 1988, 1993 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 8.1 (Berkeley) 7/27/93
+.\"
+.ds lq ``
+.ds rq ''
+.ds LH "Installing/Operating \*(4B
+.ds RH Introduction
+.ds CF \*(Dy
+.LP
+.bp
+.Sh 1 "Introduction"
+.PP
+This document explains how to install the \*(4B Berkeley
+version of UNIX on your system.
+The filesystem format is compatible with \*(Ps
+and it will only be necessary for you to do a full bootstrap
+procedure if you are installing the release on a new machine.
+The object file formats are completely different from the System
+V release, so the most straightforward procedure for upgrading
+a System V system is to do a full bootstrap.
+.PP
+The full bootstrap procedure
+is outlined in section 2; the process starts with copying a filesystem
+image onto a new disk.
+This filesystem is then booted and used to extract the remainder of the
+system binaries and sources from the archives on the tape(s).
+.PP
+The technique for upgrading a \*(Ps system is described
+in section 3 of this document.
+The upgrade procedure involves extracting system binaries
+onto new root and
+.Pn /usr
+filesystems and merging local
+configuration files into the new system.
+User filesystems may be upgraded in place.
+Most \*(Ps binaries may be used with \*(4B in the course
+of the conversion.
+It is desirable to recompile local sources after the conversion,
+as the new compiler (GCC) provides superior code optimization.
+Consult section 3.5 for a description of some of the differences
+between \*(Ps and \*(4B.
+.Sh 2 "Distribution format"
+.PP
+The distribution comes in two formats:
+.DS
+(3)\0\0 6250bpi 2400' 9-track magnetic tapes, or
+(1)\0\0 8mm Exabyte tape
+.DE
+.PP
+If you have the facilities, we \fBstrongly\fP recommend copying the
+magnetic tape(s) in the distribution kit to guard against disaster.
+The tapes contain \*(Bb-byte records.
+There are interspersed tape marks;
+end-of-tape is signaled by a double end-of-file.
+The first file on the tape is architecture dependent.
+Additional files on the tape(s)
+contain tape archive images of the system binaries and sources (see
+.Xr tar (1)\**).
+.FS
+References of the form \fIX\fP(Y) mean the entry named
+\fIX\fP in section Y of the ``UNIX Programmer's Manual''.
+.FE
+See the tape label for a description of the contents
+and format of each individual tape.
+.Sh 2 "UNIX device naming"
+.PP
+Device names have a different syntax depending on whether you are talking
+to the standalone system or a running UNIX kernel.
+The standalone system syntax is currently architecture dependent and is
+described in the various architecture specific sections as applicable.
+When not running standalone, devices are available via files in the
+.Pn /dev/
+directory.
+The file name typically encodes the device type, its logical unit and
+a partition within that unit.
+For example,
+.Pn /dev/sd2b
+refers to the second partition (``b'') of
+SCSI (``sd'') drive number ``2'', while
+.Pn /dev/rmt0
+refers to the raw (``r'') interface of 9-track tape (``mt'') unit ``0''.
+.PP
+The mapping of physical addressing information (e.g. controller, target)
+to a logical unit number is dependent on the system configuration.
+In all simple cases, where only a single controller is present, a drive
+with physical unit number 0 (e.g., as determined by its unit
+specification, either unit plug or other selection mechanism)
+will be called unit 0 in its UNIX file name.
+This is not, however, strictly
+necessary, since the system has a level of indirection in this naming.
+If there are multiple controllers, the disk unit numbers will normally
+be counted sequentially across controllers. This can be taken
+advantage of to make the system less dependent on the interconnect
+topology, and to make reconfiguration after hardware failure easier.
+.PP
+Each UNIX physical disk is divided into at most 8 logical disk partitions,
+each of which may occupy any consecutive cylinder range on the physical
+device. The cylinders occupied by the 8 partitions for each drive type
+are specified initially in the disk description file
+.Pn /etc/disktab
+(c.f.
+.Xr disktab (5)).
+The partition information and description of the
+drive geometry are written in one of the first sectors of each disk with the
+.Xr disklabel (8)
+program. Each partition may be used for either a
+raw data area such as a paging area or to store a UNIX filesystem.
+It is conventional for the first partition on a disk to be used
+to store a root filesystem, from which UNIX may be bootstrapped.
+The second partition is traditionally used as a paging area, and the
+rest of the disk is divided into spaces for additional ``mounted
+filesystems'' by use of one or more additional partitions.
+.Sh 2 "UNIX devices: block and raw"
+.PP
+UNIX makes a distinction between ``block'' and ``raw'' (character)
+devices. Each disk has a block device interface where
+the system makes the device byte addressable and you can write
+a single byte in the middle of the disk. The system will read
+out the data from the disk sector, insert the byte you gave it
+and put the modified data back. The disks with the names
+.Pn /dev/xx0[a-h] ,
+etc., are block devices.
+There are also raw devices available.
+These have names like
+.Pn /dev/rxx0[a-h] ,
+the ``r'' here standing for ``raw''.
+Raw devices bypass the buffer cache and use DMA directly to/from
+the program's I/O buffers;
+they are normally restricted to full-sector transfers.
+In the bootstrap procedures we
+will often suggest using the raw devices, because these tend
+to work faster.
+Raw devices are used when making new filesystems,
+when checking unmounted filesystems,
+or for copying quiescent filesystems.
+The block devices are used to mount filesystems.
+.PP
+You should be aware that it is sometimes important whether to use
+the character device (for efficiency) or not (because it would not
+work, e.g. to write a single byte in the middle of a sector).
+Do not change the instructions by using the wrong type of device
+indiscriminately.
diff --git a/share/doc/smm/01.setup/2.t b/share/doc/smm/01.setup/2.t
new file mode 100644
index 0000000..ddc4f7e
--- /dev/null
+++ b/share/doc/smm/01.setup/2.t
@@ -0,0 +1,1658 @@
+.\" Copyright (c) 1988, 1993 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.t 8.1 (Berkeley) 7/27/93
+.\"
+.ds lq ``
+.ds rq ''
+.ds LH "Installing/Operating \*(4B
+.ds RH Bootstrapping
+.ds CF \*(Dy
+.Sh 1 "Bootstrap procedure"
+.PP
+This section explains the bootstrap procedure that can be used
+to get the kernel supplied with this distribution running on your machine.
+If you are not currently running \*(Ps you will
+have to do a full bootstrap.
+Section 3 describes how to upgrade a \*(Ps system.
+An understanding of the operations used in a full bootstrap
+is helpful in doing an upgrade as well.
+In either case, it is highly desirable to read and understand
+the remainder of this document before proceeding.
+.PP
+The distribution supports a somewhat wider set of machines than
+those for which we have built binaries.
+The architectures that are supported only in source form include:
+.IP \(bu
+Intel 386/486-based machines (ISA/AT or EISA bus only)
+.IP \(bu
+Sony News MIPS-based workstations
+.IP \(bu
+Omron Luna 68000-based workstations
+.LP
+If you wish to run one of these architectures,
+you will have to build a cross compilation environment.
+Note that the distribution does
+.B not
+include the machine support for the Tahoe and VAX architectures
+found in previous BSD distributions.
+Our primary development environment is the HP9000/300 series machines.
+The other architectures are developed and supported by
+people outside the university.
+Consequently, we are not able to directly test or maintain these
+other architectures, so cannot comment on their robustness,
+reliability, or completeness.
+.Sh 2 "Bootstrapping from the tape"
+.LP
+The set of files on the distribution tape are as follows:
+.IP 1)
+A
+.Xr dd (1)
+(HP300),
+.Xr tar (1)
+(DECstation), or
+.Xr dump (8)
+(SPARC) image of the root filesystem
+.IP 2)
+A
+.Xr tar
+image of the
+.Pn /var
+filesystem
+.IP 3)
+A
+.Xr tar
+image of the
+.Pn /usr
+filesystem
+.IP 4)
+A
+.Xr tar
+image of
+.Pn /usr/src/sys
+.IP 5)
+A
+.Xr tar
+image of
+.Pn /usr/src
+except sys and contrib
+.IP 6)
+A
+.Xr tar
+image of
+.Pn /usr/src/contrib
+.IP 7)
+(8mm Exabyte tape distributions only)
+A
+.Xr tar
+image of
+.Pn /usr/src/X11R5
+.LP
+The tape bootstrap procedure used to create a
+working system involves the following major steps:
+.IP 1)
+Transfer a bootable root filesystem from the tape to a disk
+and get it booted and running.
+.IP 2)
+Build and restore the
+.Pn /var
+and
+.Pn /usr
+filesystems from tape with
+.Xr tar (1).
+.IP 3)
+Extract the system and utility source files as desired.
+.PP
+The following sections describe the above steps in detail.
+The details of the first step vary between architectures.
+The specific steps for the HP300, SPARC, and DECstation are
+given in the next three sections respectively.
+You should follow the instructions for your particular architecture.
+In all sections,
+commands you are expected to type are shown in italics, while that
+information printed by the system is shown emboldened.
+.Sh 2 "Booting the HP300"
+.Sh 3 "Supported hardware"
+.LP
+The hardware supported by \*(4B for the HP300/400 is as follows:
+.TS
+center box;
+lw(1i) lw(4i).
+CPU's T{
+68020 based (318, 319, 320, 330 and 350),
+68030 based (340, 345, 360, 370, 375, 400) and
+68040 based (380, 425, 433).
+T}
+_
+DISK's T{
+HP-IB/CS80 (7912, 7914, 7933, 7936, 7945, 7957, 7958, 7959, 2200, 2203)
+and SCSI-I (including magneto-optical).
+T}
+_
+TAPE's T{
+Low-density CS80 cartridge (7914, 7946, 9144),
+high-density CS80 cartridge (9145),
+HP SCSI DAT and
+SCSI Exabyte.
+T}
+_
+RS232 T{
+98644 built-in single-port, 98642 4-port and 98638 8-port interfaces.
+T}
+_
+NETWORK T{
+98643 internal and external LAN cards.
+T}
+_
+GRAPHICS T{
+Terminal emulation and raw frame buffer support for
+98544 / 98545 / 98547 (Topcat color & monochrome),
+98548 / 98549 / 98550 (Catseye color & monochrome),
+98700 / 98710 (Gatorbox),
+98720 / 98721 (Renaissance),
+98730 / 98731 (DaVinci) and
+A1096A (Hyperion monochrome).
+T}
+_
+INPUT T{
+General interface supporting all HIL devices.
+(e.g. keyboard, 2 and 3 button mice, ID module, ...)
+T}
+_
+MISC T{
+Battery-backed real time clock,
+builtin and 98625A/B HP-IB interfaces,
+builtin and 98658A SCSI interfaces,
+serial printers and plotters on HP-IB,
+and SCSI autochanger device.
+T}
+.TE
+.LP
+Major items that are not supported
+include the 310 and 332 CPU's, 400 series machines
+configured for Domain/OS, EISA and VME bus adaptors, audio, the centronics
+port, 1/2" tape drives (7980), CD-ROM, and the PVRX/TVRX 3D graphics displays.
+.Sh 3 "Standalone device file naming"
+.LP
+The standalone system device name syntax on the HP300 is of the form:
+.DS
+xx(a,c,u,p)
+.DE
+where
+\fIxx\fP is the device type,
+\fIa\fP specifies the adaptor to use,
+\fIc\fP the controller,
+\fIu\fP the unit, and
+\fIp\fP a partition.
+The \fIdevice type\fP differentiates the various disks and tapes and is one of:
+``rd'' for HP-IB CS80 disks,
+``ct'' for HP-IB CS80 cartridge tapes, or
+``sd'' for SCSI-I disks
+(SCSI-I tapes are currently not supported).
+The \fIadaptor\fP field is a logical HP-IB or SCSI bus adaptor card number.
+This will typically be
+0 for SCSI disks,
+0 for devices on the ``slow'' HP-IB interface (usually tapes) and
+1 for devices on the ``fast'' HP-IB interface (usually disks).
+To get a complete mapping of physical (select-code) to logical card numbers
+just type a ^C at the standalone prompt.
+The \fIcontroller\fP field is the disk or tape's target number on the
+HP-IB or SCSI bus.
+For SCSI the range is 0 to 6 (7 is the adaptor address) and
+for HP-IB the range is 0 to 7.
+The \fIunit\fP field is unused and should be 0.
+The \fIpartition\fP field is interpreted differently for tapes
+and disks: for disks it is a disk partition (in the range 0-7),
+and for tapes it is a file number offset on the tape.
+Thus, partition 2 of a SCSI disk drive at target 3 on SCSI bus 1
+would be ``sd(1,3,0,2)''.
+If you have only one of any type bus adaptor, you may omit the adaptor
+and controller numbers;
+e.g. ``sd(0,2)'' could be used instead of ``sd(0,0,0,2)''.
+The following examples always use the full syntax for clarity.
+.Sh 3 "The procedure"
+.LP
+The basic steps involved in bringing up the HP300 are as follows:
+.IP 1)
+Obtain a second disk and format it, if necessary.
+.IP 2)
+Copy a root filesystem from the
+tape onto the beginning of the disk.
+.IP 3)
+Boot the UNIX system on the new disk.
+.IP 4)
+(Optional) Build a root filesystem optimized for your disk.
+.IP 5)
+Label the disks with the
+.Xr disklabel (8)
+program.
+.Sh 4 "Step 1: selecting and formatting a disk"
+.PP
+For your first system you will have to obtain a formatted disk
+of a type given in the ``supported hardware'' list above.
+If you want to load an entire binary system
+(i.e., everything except
+.Pn /usr/src ),
+on the single disk you will need a minimum of 290MB,
+ruling out anything smaller than a 7959B/S disk.
+The disklabel included in the bootstrap root image is laid out
+to accommodate this scenario.
+Note that an HP SCSI magneto-optical disk will work fine for this case.
+\*(4B will boot and run (albeit slowly) using one.
+If you want to load source on a single disk system,
+you will need at least 640MB (at least a 2213A SCSI or 2203A HP-IB disk).
+A disk as small as the 7945A (54MB) can be used for the bootstrap
+procedure but will hold only the root and primary swap partitions.
+If you plan to use multiple disks,
+refer to section 2.5 for suggestions on partitioning.
+.PP
+After selecting a disk, you may need to format it.
+Since most HP disk drives come pre-formatted
+(except optical media)
+you probably will not, but if necessary,
+you can format a disk under HP-UX using the
+.Xr mediainit (1m)
+program.
+Once you have \*(4B up and running on one machine you can use the
+.Xr scsiformat (8)
+program to format additional SCSI disks.
+Any additional HP-IB disks will have to be formatted using HP-UX.
+.Sh 4 "Step 2: copying the root filesystem from tape to disk"
+.PP
+Once you have a formatted second disk you can use the
+.Xr dd (1)
+command under HP-UX to copy the root filesystem image from
+the tape to the beginning of the second disk.
+For HP's, the root filesystem image is the first file on the tape.
+It includes a disklabel and bootblock along with the root filesystem.
+An example command to copy the image from tape to the beginning of a disk is:
+.DS
+.ft CW
+dd if=/dev/rmt/0m of=/dev/rdsk/1s0 bs=\*(Bzb
+.DE
+The actual special file syntax may vary depending on unit numbers and
+the version of HP-UX that is running.
+Consult the HP-UX
+.Xr mt (7)
+and
+.Xr disk (7)
+man pages for details.
+.PP
+Note that if you have a SCSI disk, you don't necessarily have to use
+HP-UX (or an HP) to create the boot disk.
+Any machine and operating system that will allow you to copy the
+raw disk image out to block 0 of the disk will do.
+.PP
+If you have only a single machine with a single disk,
+you may still be able to install and boot \*(4B if you have an
+HP-IB cartridge tape drive.
+If so, you can use a more difficult approach of booting a
+standalone copy program from the tape, and using that to copy the
+root filesystem image from the tape to the disk.
+To do this, you need to extract the first file of the distribution tape
+(the root image), copy it over to a machine with a cartridge drive
+and then copy the image onto tape.
+For example:
+.DS
+.ft CW
+dd if=/dev/rst0 of=bootimage bs=\*(Bzb
+rcp bootimage foo:/tmp/bootimage
+<login to foo>
+dd if=/tmp/bootimage of=/dev/rct/0m bs=\*(Bzb
+.DE
+Once this tape is created you can boot and run the standalone tape
+copy program from it.
+The copy program is loaded just as any other program would be loaded
+by the bootrom in ``attended'' mode:
+reset the CPU,
+hold down the space bar until the word ``Keyboard'' appears in the
+installed interface list, and
+enter the menu selection for SYS_TCOPY.
+Once loaded and running:
+.DS
+.TS
+lw(2i) l.
+\fBFrom:\fP \fI^C\fP (control-C to see logical adaptor assignments)
+\fBhpib0 at sc7\fP
+\fBscsi0 at sc14\fP
+\fBFrom:\fP \fIct(0,7,0,0)\fP (HP-IB tape, target 7, first tape file)
+\fBTo:\fP \fIsd(0,0,0,2)\fP (SCSI disk, target 0, third partition)
+\fBCopy completed: 1728 records copied\fP
+.TE
+.DE
+.LP
+This copy will likely take 30 minutes or more.
+.Sh 4 "Step 3: booting the root filesystem"
+.PP
+You now have a bootable root filesystem on the disk.
+If you were previously running with two disks,
+it would be best if you shut down the machine and turn off power on
+the HP-UX drive.
+It will be less confusing and it will eliminate any chance of accidentally
+destroying the HP-UX disk.
+If you used a cartridge tape for booting you should also unload the tape
+at this point.
+Whether you booted from tape or copied from disk you should now reboot
+the machine and do another attended boot (see previous section),
+this time with SYS_TBOOT.
+Once loaded and running the boot program will display the CPU type and
+prompt for a kernel file to boot:
+.DS
+.B
+HP433 CPU
+Boot
+.R
+\fB:\fP \fI/vmunix\fP
+.DE
+.LP
+After providing the kernel name, the machine will boot \*(4B with
+output that looks about like this:
+.DS
+.B
+597480+34120+139288 start 0xfe8019ec
+Copyright (c) 1982, 1986, 1989, 1991, 1993
+ The Regents of the University of California.
+Copyright (c) 1992 Hewlett-Packard Company
+Copyright (c) 1992 Motorola Inc.
+All rights reserved.
+
+4.4BSD UNIX #1: Tue Jul 20 11:40:36 PDT 1993
+ mckusick@vangogh.CS.Berkeley.EDU:/usr/obj/sys/compile/GENERIC.hp300
+HP9000/433 (33MHz MC68040 CPU+MMU+FPU, 4k on-chip physical I/D caches)
+real mem = xxx
+avail mem = ###
+using ### buffers containing ### bytes of memory
+(... information about available devices ...)
+root device?
+.R
+.DE
+.PP
+The first three numbers are printed out by the bootstrap program and
+are the sizes of different parts of the system (text, initialized and
+uninitialized data). The system also allocates several system data
+structures after it starts running. The sizes of these structures are
+based on the amount of available memory and the maximum count of active
+users expected, as declared in a system configuration description. This
+will be discussed later.
+.PP
+UNIX itself then runs for the first time and begins by printing out a banner
+identifying the release and
+version of the system that is in use and the date that it was compiled.
+.PP
+Next the
+.I mem
+messages give the
+amount of real (physical) memory and the
+memory available to user programs
+in bytes.
+For example, if your machine has 16Mb bytes of memory, then
+\fBxxx\fP will be 16777216.
+.PP
+The messages that come out next show what devices were found on
+the current processor. These messages are described in
+.Xr autoconf (4).
+The distributed system may not have
+found all the communications devices you have
+or all the mass storage peripherals you have, especially
+if you have more than
+two of anything. You will correct this when you create
+a description of your machine from which to configure a site-dependent
+version of UNIX.
+The messages printed at boot here contain much of the information
+that will be used in creating the configuration.
+In a correctly configured system most of the information
+present in the configuration description
+is printed out at boot time as the system verifies that each device
+is present.
+.PP
+The \*(lqroot device?\*(rq prompt was printed by the system
+to ask you for the name of the root filesystem to use.
+This happens because the distribution system is a \fIgeneric\fP
+system, i.e., it can be bootstrapped on a cpu with its root device
+and paging area on any available disk drive.
+You will most likely respond to the root device question with ``sd0''
+if you are booting from a SCSI disk,
+or with ``rd0'' if you are booting from an HP-IB disk.
+This response shows that the disk it is running
+on is drive 0 of type ``sd'' or ``rd'' respectively.
+If you have other disks attached to the system,
+it is possible that the drive you are using will not be configured
+as logical drive 0.
+Check the autoconfiguration messages printed out by the kernel to
+make sure.
+These messages will show the type of every logical drive
+and their associated controller and slave addresses.
+You will later build a system tailored to your configuration
+that will not prompt you for a root device when it is bootstrapped.
+.DS
+\fBroot device?\fP \fI\*(Dk0\fP
+\fBWARNING: preposterous time in filesystem \-\- CHECK AND RESET THE DATE!\fP
+\fBerase ^?, kill ^U, intr ^C\fP
+\fB#\fP
+.DE
+.PP
+The \*(lqerase ...\*(rq message is part of the
+.Pn /.profile
+that was executed by the root shell when it started. This message
+tells you about the settings of the character erase,
+line erase, and interrupt characters.
+.PP
+UNIX is now running,
+and the \fIUNIX Programmer's Manual\fP applies. The ``#'' is the prompt
+from the Bourne shell, and lets you know that you are the super-user,
+whose login name is \*(lqroot\*(rq.
+.PP
+At this point, the root filesystem is mounted read-only.
+Before continuing the installation, the filesystem needs to be ``updated''
+to allow writing and device special files for the following steps need
+to be created.
+This is done as follows:
+.DS
+.TS
+lw(2i) l.
+\fB#\fP \fImount_mfs -s 1000 -T type /dev/null /tmp\fP (create a writable filesystem)
+(\fItype\fP is the disk type as determined from /etc/disktab)
+\fB#\fP \fIcd /tmp\fP (connect to that directory)
+\fB#\fP \fI../dev/MAKEDEV \*(Dk#\fP (create special files for root disk)
+(\fI\*(Dk\fP is the disk type, \fI#\fP is the unit number)
+(ignore warning from ``sh'')
+\fB#\fP \fImount \-uw /tmp/\*(Dk#a /\fP (read-write mount root filesystem)
+\fB#\fP \fIcd /dev\fP (go to device directory)
+\fB#\fP \fI./MAKEDEV \*(Dk#\fP (create permanent special files for root disk)
+(again, ignore warning from ``sh'')
+.TE
+.DE
+.Sh 4 "Step 4: (optional) restoring the root filesystem"
+.PP
+The root filesystem that you are currently running on is complete,
+however it probably is not optimally laid out for the disk on
+which you are running.
+If you will be cloning copies of the system onto multiple disks for
+other machines, you are advised to connect one of these disks to
+this machine, and build and restore a properly laid out root filesystem
+onto it.
+If this is the only machine on which you will be running \*(4B
+or peak performance is not an issue, you can skip this step and
+proceed directly to step 5.
+.PP
+Connect a second disk to your machine.
+If you bootstrapped using the two disk method, you can
+overwrite your initial HP-UX disk, as it will no longer
+be needed (assuming you have no plans to run HP-UX again).
+.PP
+To really create the root filesystem on drive 1
+you should first label the disk as described in step 5 below.
+Then run the following commands:
+.DS
+\fB#\fP \fIcd /dev\fP
+\fB#\fP \fI./MAKEDEV \*(Dk1a\fP
+\fB#\fP\|\fInewfs /dev/r\*(Dk1a\fP
+\fB#\fP\|\fImount /dev/\*(Dk1a /mnt\fP
+\fB#\fP\|\fIcd /mnt\fP
+\fB#\fP\|\fIdump 0f \- /dev/r\*(Dk0a | restore xf \-\fP
+(Note: restore will ask if you want to ``set owner/mode for '.'''
+to which you should reply ``yes''.)
+.DE
+.PP
+When this completes,
+you should then shut down the system, and boot on the disk that
+you just created following the procedure in step (3) above.
+.Sh 4 "Step 5: placing labels on the disks"
+.PP
+For each disk on the HP300, \*(4B places information about the geometry
+of the drive and the partition layout at byte offset 1024.
+This information is written with
+.Xr disklabel (8).
+.PP
+The root image just loaded includes a ``generic'' label intended to allow
+easy installation of the root and
+.Pn /usr
+and may not be suitable for the actual
+disk on which it was installed.
+In particular,
+it may make your disk appear larger or smaller than its real size.
+In the former case, you lose some capacity.
+In the latter, some of the partitions may map non-existent sectors
+leading to errors if those partitions are used.
+It is also possible that the defined geometry will interact poorly with
+the filesystem code resulting in reduced performance.
+However, as long as you are willing to give up a little space,
+not use certain partitions or suffer minor performance degradation,
+you might want to avoid this step;
+especially if you do not know how to use
+.Xr ed (1).
+.PP
+If you choose to edit this label,
+you can fill in correct geometry information from
+.Pn /etc/disktab .
+You may also want to rework the ``e'' and ``f'' partitions used for loading
+.Pn /usr
+and
+.Pn /var .
+You should not attempt to, and
+.Xr disklabel
+will not let you, modify the ``a'', ``b'' and ``d'' partitions.
+To edit a label:
+.DS
+\fB#\fP \fIEDITOR=ed\fP
+\fB#\fP \fIexport EDITOR\fP
+\fB#\fP \fIdisklabel -r -e /dev/r\fBXX#\fPd
+.DE
+where \fBXX\fP is the type and \fB#\fP is the logical drive number; e.g.
+.Pn /dev/rsd0d
+or
+.Pn /dev/rrd0d .
+Note the explicit use of the ``d'' partition.
+This partition includes the bootblock as does ``c''
+and using it allows you to change the size of ``c''.
+.PP
+If you wish to label any additional disks, run the following command for each:
+.DS
+\fB#\|\fP\fIdisklabel -rw \fBXX# type\fP \fI"optional_pack_name"\fP
+.DE
+where \fBXX#\fP is the same as in the previous command
+and \fBtype\fP is the HP300 disk device name as listed in
+.Pn /etc/disktab .
+The optional information may contain any descriptive name for the
+contents of a disk, and may be up to 16 characters long. This procedure
+will place the label on the disk using the information found in
+.Pn /etc/disktab
+for the disk type named.
+If you have changed the disk partition sizes,
+you may wish to add entries for the modified configuration in
+.Pn /etc/disktab
+before labeling the affected disks.
+.PP
+You have now completed the HP300 specific part of the installation.
+Now proceed to the generic part of the installation
+described starting in section 2.5 below.
+Note that where the disk name ``sd'' is used throughout section 2.5,
+you should substitute the name ``rd'' if you are running on an HP-IB disk.
+Also, if you are loading on a single disk with the default disklabel,
+.Pn /var
+should be restored to the ``f'' partition and
+.Pn /usr
+to the ``e'' partition.
+.Sh 2 "Booting the SPARC"
+.Sh 3 "Supported hardware"
+.LP
+The hardware supported by \*(4B for the SPARC is as follows:
+.TS
+center box;
+lw(1i) lw(4i).
+CPU's T{
+SPARCstation 1 series (1, 1+, SLC, IPC) and
+SPARCstation 2 series (2, IPX).
+T}
+_
+DISK's T{
+SCSI.
+T}
+_
+TAPE's T{
+none.
+T}
+_
+NETWORK T{
+SPARCstation Lance (le).
+T}
+_
+GRAPHICS T{
+bwtwo and cgthree.
+T}
+_
+INPUT T{
+Keyboard and mouse.
+T}
+_
+MISC T{
+Battery-backed real time clock,
+built-in serial devices,
+Sbus SCSI controller,
+and audio device.
+T}
+.TE
+.LP
+Major items that are not supported include
+anything VME-based,
+the GX (cgsix) display,
+the floppy disk, and SCSI tapes.
+.Sh 3 "Limitations"
+.LP
+There are several important limitations on the \*(4B distribution
+for the SPARC:
+.IP 1)
+You
+.B must
+have SunOS 4.1.x or Solaris to bring up \*(4B.
+There is no SPARCstation bootstrap code in this distribution. The
+Sun-supplied boot loader will be used to boot \*(4B; you must copy
+this from your SunOS distribution. This imposes several
+restrictions on the system, as detailed below.
+.IP 2)
+The \*(4B SPARC kernel does not remap SCSI IDs. A SCSI disk at
+target 0 will become ``sd0'', where in SunOS the same disk will
+normally be called ``sd3''. If your existing SunOS system is
+diskful, it will be least painful to have SunOS running on the disk
+on target 0 lun 0 and put \*(4B on the disk on target 3 lun 0. Both
+systems will then think they are running on ``sd0'', and you can
+boot either system as needed simply by changing the EEPROM's boot
+device.
+.IP 3)
+There is no SCSI tape driver.
+You must have another system for tape reading and backups.
+.IP 4)
+Although the \*(4B SPARC kernel will handle existing SunOS shared
+libraries, it does not use or create them itself, and therefore
+requires much more disk space than SunOS does.
+.IP 5)
+It is currently difficult (though not completely impossible) to
+run \*(4B diskless. These instructions assume you will have a local
+boot, swap, and root filesystem.
+.IP 6)
+When using a serial port rather than a graphics display as the console,
+only port
+.Pn ttya
+can be used.
+Attempts to use port
+.Pn ttyb
+will fail when the kernel tries
+to print the boot up messages to the console.
+.Sh 3 "The procedure"
+.PP
+You must have a spare disk on which to place \*(4B.
+The steps involved in bootstrapping this tape are as follows:
+.IP 1)
+Bring up SunOS (preferably SunOS 4.1.x or Solaris 1.x, although
+Solaris 2 may work \(em this is untested).
+.IP 2)
+Attach auxiliary SCSI disk(s). Format and label using the
+SunOS formatting and labeling programs as needed.
+Note that the root filesystem currently requires at least 10 MB; 16 MB
+or more is recommended. The b partition will be used for swap;
+this should be at least 32 MB.
+.IP 3)
+Use the SunOS
+.Xr newfs
+to build the root filesystem. You may also
+want to build other filesystems at the same time. (By default, the
+\*(4B
+.Xr newfs
+builds a filesystem that SunOS will not handle; if you
+plan to switch OSes back and forth you may want to sacrifice the
+performance gain from the new filesystem format for compatibility.)
+You can build an old-format filesystem on \*(4B by giving the \-O
+option to
+.Xr newfs (8).
+.Xr Fsck (8)
+can convert old format filesystems to new format
+filesystems, but not vice versa,
+so you may want to initially build old format filesystems so that they
+can be mounted under SunOS,
+and then later convert them to new format filesystems when you are
+satisfied that \*(4B is running properly.
+In any case,
+.B
+you must build an old-style root filesystem
+.R
+so that the SunOS boot program will work.
+.IP 4)
+Mount the new root, then copy the SunOS
+.Pn /boot
+into place and use the SunOS ``installboot'' program
+to enable disk-based booting.
+Note that the filesystem must be mounted when you do the ``installboot'':
+.DS
+.ft CW
+# mount /dev/sd3a /mnt
+# cp /boot /mnt/boot
+# cd /usr/kvm/mdec
+# installboot /mnt/boot bootsd /dev/rsd3a
+.DE
+The SunOS
+.Pn /boot
+will load \*(4B kernels; there is no SPARCstation
+bootstrap code on the distribution. Note that the SunOS
+.Pn /boot
+does not handle the new \*(4B filesystem format.
+.IP 5)
+Restore the contents of the \*(4B root filesystem.
+.DS
+.ft CW
+# cd /mnt
+# rrestore xf tapehost:/dev/nrst0
+.DE
+.IP 6)
+Boot the supplied kernel:
+.DS
+.ft CW
+# halt
+ok boot sd(0,3)vmunix -s [for old proms] OR
+ok boot disk3 -s [for new proms]
+\&... [\*(4B boot messages]
+.DE
+.LP
+To install the remaining filesystems, use the procedure described
+starting in section 2.5.
+In these instructions,
+.Pn /usr
+should be loaded into the ``e'' partition and
+.Pn /var
+in the ``f'' partition.
+.LP
+After completing the filesystem installation you may want
+to set up \*(4B to reboot automatically:
+.DS
+.ft CW
+# halt
+ok setenv boot-from sd(0,3)vmunix [for old proms] OR
+ok setenv boot-device disk3 [for new proms]
+.DE
+If you build backwards-compatible filesystems, either with the SunOS
+newfs or with the \*(4B ``\-O'' option, you can mount these under
+SunOS. The SunOS fsck will, however, always think that these filesystems
+are corrupted, as there are several new (previously unused)
+superblock fields that are updated in \*(4B. Running ``fsck \-b32''
+and letting it ``fix'' the superblock will take care of this.
+.sp 0.5
+If you wish to run SunOS binaries that use SunOS shared libraries, you
+simply need to copy all the dynamic linker files from an existing
+SunOS system:
+.DS
+.ft CW
+# rcp sunos-host:/etc/ld.so.cache /etc/
+# rcp sunos-host:'/usr/lib/*.so*' /usr/lib/
+.DE
+The SunOS compiler and linker should be able to produce SunOS binaries
+under \*(4B, but this has not been tested. If you plan to try it you
+will need the appropriate .sa files as well.
+.Sh 2 "Booting the DECstation"
+.Sh 3 "Supported hardware"
+.LP
+The hardware supported by \*(4B for the DECstation is as follows:
+.TS
+center box;
+lw(1i) lw(4i).
+CPU's T{
+R2000 based (3100) and
+R3000 based (5000/200, 5000/20, 5000/25, 5000/1xx).
+T}
+_
+DISK's T{
+SCSI-I (tested RZ23, RZ55, RZ57, Maxtor 8760S).
+T}
+_
+TAPE's T{
+SCSI-I (tested DEC TK50, Archive DAT, Emulex MT02).
+T}
+_
+RS232 T{
+Internal DEC dc7085 and AMD 8530 based interfaces.
+T}
+_
+NETWORK T{
+TURBOchannel PMAD-AA and internal LANCE based interfaces.
+T}
+_
+GRAPHICS T{
+Terminal emulation and raw frame buffer support for
+3100 (color & monochrome),
+TURBOchannel PMAG-AA, PMAG-BA, PMAG-DV.
+T}
+_
+INPUT T{
+Standard DEC keyboard (LK201) and mouse.
+T}
+_
+MISC T{
+Battery-backed real time clock,
+internal and TURBOchannel PMAZ-AA SCSI interfaces.
+T}
+.TE
+.LP
+Major items that are not supported include the 5000/240
+(there is code but not compiled in or tested),
+R4000 based machines, FDDI and audio interfaces.
+Diskless machines are not supported but booting kernels and bootstrapping
+over the network is supported on the 5000 series.
+.Sh 3 "The procedure"
+.PP
+The first file on the distribution tape is a tar file that contains
+four files.
+The first step requires a running UNIX (or ULTRIX) system that can
+be used to extract the tar archive from the first file on the tape.
+The command:
+.DS
+.ft CW
+tar xf /dev/rmt0
+.DE
+will extract the following four files:
+.DS
+A) root.image: \fIdd\fP image of the root filesystem
+B) vmunix.tape: \fIdd\fP image for creating boot tapes
+C) vmunix.net: file for booting over the network
+D) root.dump: \fIdump\fP image of the root filesystem
+.DE
+There are three basic ways a system can be bootstrapped corresponding to the
+first three files.
+You may want to read the section on bootstrapping the HP300
+since many of the steps are similar.
+A spare, formatted SCSI disk is also useful.
+.Sh 4 "Procedure A: copy root filesystem to disk"
+.PP
+This procedure is similar to the HP300.
+If you have an extra disk, the easiest approach is to use \fIdd\fP\|(1)
+under ULTRIX to copy the root filesystem image to the beginning
+of the spare disk.
+The root filesystem image includes a disklabel and bootblock along with the
+root filesystem.
+An example command to copy the image to the beginning of a disk is:
+.DS
+.ft CW
+dd if=root.image of=/dev/rz1c bs=\*(Bzb
+.DE
+The actual special file syntax will vary depending on unit numbers and
+the version of ULTRIX that is running.
+This system is now ready to boot. You can boot the kernel with one of the
+following PROM commands. If you are booting on a 3100, the disk must be SCSI
+id zero because of a bug.
+.DS
+.ft CW
+DEC 3100: boot \-f rz(0,0,0)vmunix
+DEC 5000: boot 5/rz0/vmunix
+.DE
+You can then proceed to section 2.5
+to create reasonable disk partitions for your machine
+and then install the rest of the system.
+.Sh 4 "Procedure B: bootstrap from tape"
+.PP
+If you have only a single machine with a single disk,
+you need to use the more difficult approach of booting a
+kernel and mini-root from tape or the network, and using it to restore
+the root filesystem.
+.PP
+First, you will need to create a boot tape. This can be done using
+\fIdd\fP as in the following example.
+.DS
+.ft CW
+dd if=vmunix.tape of=/dev/nrmt0 bs=1b
+dd if=root.dump of=/dev/nrmt0 bs=\*(Bzb
+.DE
+The actual special file syntax for the tape drive will vary depending on
+unit numbers, tape device and the version of ULTRIX that is running.
+.PP
+The first file on the boot tape contains a boot header, kernel, and
+mini-root filesystem that the PROM can copy into memory.
+Installing from tape has only been tested
+on a 3100 and a 5000/200 using a TK50 tape drive. Here are two example
+PROM commands to boot from tape.
+.DS
+.ft CW
+DEC 3100: boot \-f tz(0,5,0) m # 5 is the SCSI id of the TK50
+DEC 5000: boot 5/tz6 m # 6 is the SCSI id of the TK50
+.DE
+The `m' argument tells the kernel to look for a root filesystem in memory.
+Next you should proceed to section 2.4.3 to build a disk-based root filesystem.
+.Sh 4 "Procedure C: bootstrap over the network"
+.PP
+You will need a host machine that is running the \fIbootp\fP server
+with the
+.Pn vmunix.net
+file installed in the default directory defined by the
+configuration file for
+.Xr bootp .
+Here are two example PROM commands to boot across the net:
+.DS
+.ft CW
+DEC 3100: boot \-f tftp()vmunix.net m
+DEC 5000: boot 6/tftp/vmunix.net m
+.DE
+This command should load the kernel and mini-root into memory and
+run the same as the tape install (procedure B).
+The rest of the steps are the same except
+you will need to start the network
+(if you are unsure how to fill in the <name> fields below,
+see sections 4.4 and 5).
+Execute the following to start the networking:
+.DS
+.ft CW
+# mount \-uw /
+# echo 127.0.0.1 localhost >> /etc/hosts
+# echo <your.host.inet.number> myname.my.domain myname >> /etc/hosts
+# echo <friend.host.inet.number> myfriend.my.domain myfriend >> /etc/hosts
+# ifconfig le0 inet myname
+.DE
+Next you should proceed to section 2.4.3 to build a disk-based root filesystem.
+.Sh 3 "Label disk and create the root filesystem"
+.LP
+There are five steps to create a disk-based root filesystem.
+.IP 1)
+Label the disk.
+.DS
+.ft CW
+# disklabel -W /dev/rrz?c # This enables writing the label
+# disklabel -w -r -B /dev/rrz?c $DISKTYPE
+# newfs /dev/rrz?a
+\&...
+# fsck /dev/rrz?a
+\&...
+.DE
+Supported disk types are listed in
+.Pn /etc/disktab .
+.IP 2)
+Restore the root filesystem.
+.DS
+.ft CW
+# mount \-uw /
+# mount /dev/rz?a /a
+# cd /a
+.DE
+.ti +0.4i
+If you are restoring locally (procedure B), run:
+.DS
+.ft CW
+# mt \-f /dev/nrmt0 rew
+# restore \-xsf 2 /dev/rmt0
+.DE
+.ti +0.4i
+If you are restoring across the net (procedure c), run:
+.DS
+.ft CW
+# rrestore xf myfriend:/path/to/root.dump
+.DE
+.ti +0.4i
+When the restore finishes, clean up with:
+.DS
+.ft CW
+# cd /
+# sync
+# umount /a
+# fsck /dev/rz?a
+.DE
+.IP 3)
+Reset the system and initialize the PROM monitor to boot automatically.
+.DS
+.ft CW
+DEC 3100: setenv bootpath boot \-f rz(0,?,0)vmunix
+DEC 5000: setenv bootpath 5/rz?/vmunix -a
+.DE
+.IP 4)
+After booting UNIX, you will need to create
+.Pn /dev/mouse
+to run X windows as in the following example.
+.DS
+.ft CW
+rm /dev/mouse
+ln /dev/xx /dev/mouse
+.DE
+The 'xx' should be one of the following:
+.DS
+pm0 raw interface to PMAX graphics devices
+cfb0 raw interface to TURBOchannel PMAG-BA color frame buffer
+xcfb0 raw interface to maxine graphics devices
+mfb0 raw interface to mono graphics devices
+.DE
+You can then proceed to section 2.5 to install the rest of the system.
+Note that where the disk name ``sd'' is used throughout section 2.5,
+you should substitute the name ``rz''.
+.Sh 2 "Disk configuration"
+.PP
+All architectures now have a root filesystem up and running and
+proceed from this point to layout filesystems to make use
+of the available space and to balance disk load for better system
+performance.
+.Sh 3 "Disk naming and divisions"
+.PP
+Each physical disk drive can be divided into up to 8 partitions;
+UNIX typically uses only 3 or 4 partitions.
+For instance, the first partition, \*(Dk0a,
+is used for a root filesystem, a backup thereof,
+or a small filesystem like,
+.Pn /var/tmp ;
+the second partition, \*(Dk0b,
+is used for paging and swapping; and
+a third partition, typically \*(Dk0e,
+holds a user filesystem.
+.PP
+The space available on a disk varies per device.
+Each disk typically has a paging area of 30 to 100 megabytes
+and a root filesystem of about 17 megabytes.
+.\" XXX check
+The distributed system binaries occupy about 150 (180 with X11R5) megabytes
+.\" XXX check
+while the major sources occupy another 250 (340 with X11R5) megabytes.
+The
+.Pn /var
+filesystem as delivered on the tape is only 2Mb,
+however it should have at least 50Mb allocated to it just for
+normal system activity.
+Usually it is allocated the last partition on the disk
+so that it can provide as much space as possible to the
+.Pn /var/users
+filesystem.
+See section 2.5.4 for further details on disk layouts.
+.PP
+Be aware that the disks have their sizes
+measured in disk sectors (usually 512 bytes), while the UNIX filesystem
+blocks are variable sized.
+If
+.Sm BLOCKSIZE=1k
+is set in the user's environment, all user programs report
+disk space in kilobytes, otherwise,
+disk sizes are always reported in units of 512-byte sectors\**.
+.FS
+You can thank System V intransigence and POSIX duplicity for
+requiring that 512-byte blocks be the units that programs report.
+.FE
+The
+.Pn /etc/disktab
+file used in labelling disks and making filesystems
+specifies disk partition sizes in sectors.
+.Sh 3 "Layout considerations"
+.PP
+There are several considerations in deciding how
+to adjust the arrangement of things on your disks.
+The most important is making sure that there is adequate space
+for what is required; secondarily, throughput should be maximized.
+Paging space is an important parameter.
+The system, as distributed, sizes the configured
+paging areas each time the system is booted. Further,
+multiple paging areas of different sizes may be interleaved.
+.PP
+Many common system programs (C, the editor, the assembler etc.)
+create intermediate files in the
+.Pn /tmp
+directory, so the filesystem where this is stored also should be made
+large enough to accommodate most high-water marks.
+Typically,
+.Pn /tmp
+is constructed from a memory-based filesystem (see
+.Xr mount_mfs (8)).
+Programs that want their temporary files to persist
+across system reboots (such as editors) should use
+.Pn /var/tmp .
+If you plan to use a disk-based
+.Pn /tmp
+filesystem to avoid loss across system reboots, it makes
+sense to mount this in a ``root'' (i.e. first partition)
+filesystem on another disk.
+All the programs that create files in
+.Pn /tmp
+take care to delete them, but are not immune to rare events
+and can leave dregs.
+The directory should be examined every so often and the old
+files deleted.
+.PP
+The efficiency with which UNIX is able to use the CPU
+is often strongly affected by the configuration of disk controllers;
+it is critical for good performance to balance disk load.
+There are at least five components of the disk load that you can
+divide between the available disks:
+.IP 1)
+The root filesystem.
+.IP 2)
+The
+.Pn /var
+and
+.Pn /var/tmp
+filesystems.
+.IP 3)
+The
+.Pn /usr
+filesystem.
+.IP 4)
+The user filesystems.
+.IP 5)
+The paging activity.
+.LP
+The following possibilities are ones we have used at times
+when we had 2, 3 and 4 disks:
+.TS
+center doublebox;
+l | c s s
+l | lw(5) | lw(5) | lw(5).
+ disks
+what 2 3 4
+_
+root 0 0 0
+var 1 2 3
+usr 1 1 1
+paging 0+1 0+2 0+2+3
+users 0 0+2 0+2
+archive x x 3
+.TE
+.PP
+The most important things to consider are to
+even out the disk load as much as possible, and to do this by
+decoupling filesystems (on separate arms) between which heavy copying occurs.
+Note that a long term average balanced load is not important; it is
+much more important to have an instantaneously balanced
+load when the system is busy.
+.PP
+Intelligent experimentation with a few filesystem arrangements can
+pay off in much improved performance. It is particularly easy to
+move the root, the
+.Pn /var
+and
+.Pn /var/tmp
+filesystems and the paging areas. Place the
+user files and the
+.Pn /usr
+directory as space needs dictate and experiment
+with the other, more easily moved filesystems.
+.Sh 3 "Filesystem parameters"
+.PP
+Each filesystem is parameterized according to its block size,
+fragment size, and the disk geometry characteristics of the
+medium on which it resides. Inaccurate specification of the disk
+characteristics or haphazard choice of the filesystem parameters
+can result in substantial throughput degradation or significant
+waste of disk space. As distributed,
+filesystems are configured according to the following table.
+.DS
+.TS
+center;
+l l l.
+Filesystem Block size Fragment size
+_
+root 8 kbytes 1 kbytes
+usr 8 kbytes 1 kbytes
+users 4 kbytes 512 bytes
+.TE
+.DE
+.PP
+The root filesystem block size is
+made large to optimize bandwidth to the associated disk.
+The large block size is important as many of the most
+heavily used programs are demand paged out of the
+.Pn /bin
+directory.
+The fragment size of 1 kbyte is a ``nominal'' value to use
+with a filesystem. With a 1 kbyte fragment size
+disk space utilization is about the same
+as with the earlier versions of the filesystem.
+.PP
+The filesystems for users have a 4 kbyte block
+size with 512 byte fragment size. These parameters
+have been selected based on observations of the
+performance of our user filesystems. The 4 kbyte
+block size provides adequate bandwidth while the
+512 byte fragment size provides acceptable space compaction
+and disk fragmentation.
+.PP
+Other parameters may be chosen in constructing filesystems,
+but the factors involved in choosing a block
+size and fragment size are many and interact in complex
+ways. Larger block sizes result in better
+throughput to large files in the filesystem as
+larger I/O requests will then be done by the
+system. However,
+consideration must be given to the average file sizes
+found in the filesystem and the performance of the
+internal system buffer cache. The system
+currently provides space in the inode for
+12 direct block pointers, 1 single indirect block
+pointer, 1 double indirect block pointer,
+and 1 triple indirect block pointer.
+If a file uses only direct blocks, access time to
+it will be optimized by maximizing the block size.
+If a file spills over into an indirect block,
+increasing the block size of the filesystem may
+decrease the amount of space used
+by eliminating the need to allocate an indirect block.
+However, if the block size is increased and an indirect
+block is still required, then more disk space will be
+used by the file because indirect blocks are allocated
+according to the block size of the filesystem.
+.PP
+In selecting a fragment size for a filesystem, at least
+two considerations should be given. The major performance
+tradeoffs observed are between an 8 kbyte block filesystem
+and a 4 kbyte block filesystem. Because of implementation
+constraints, the block size versus fragment size ratio can not
+be greater than 8. This means that an 8 kbyte filesystem
+will always have a fragment size of at least 1 kbytes. If
+a filesystem is created with a 4 kbyte block size and a
+1 kbyte fragment size, then upgraded to an 8 kbyte block size
+and 1 kbyte fragment size, identical space compaction will be
+observed. However, if a filesystem has a 4 kbyte block size
+and 512 byte fragment size, converting it to an 8K/1K
+filesystem will result in 4-8% more space being
+used. This implies that 4 kbyte block filesystems that
+might be upgraded to 8 kbyte blocks for higher performance should
+use fragment sizes of at least 1 kbytes to minimize the amount
+of work required in conversion.
+.PP
+A second, more important, consideration when selecting the
+fragment size for a filesystem is the level of fragmentation
+on the disk. With an 8:1 fragment to block ratio, storage fragmentation
+occurs much sooner, particularly with a busy filesystem running
+near full capacity. By comparison, the level of fragmentation in a
+4:1 fragment to block ratio filesystem is one tenth as severe. This
+means that on filesystems where many files are created and
+deleted, the 512 byte fragment size is more likely to result in apparent
+space exhaustion because of fragmentation. That is, when the filesystem
+is nearly full, file expansion that requires locating a
+contiguous area of disk space is more likely to fail on a 512
+byte filesystem than on a 1 kbyte filesystem. To minimize
+fragmentation problems of this sort, a parameter in the super
+block specifies a minimum acceptable free space threshold. When
+normal users (i.e. anyone but the super-user) attempt to allocate
+disk space and the free space threshold is exceeded, the user is
+returned an error as if the filesystem were really full. This
+parameter is nominally set to 5%; it may be changed by supplying
+a parameter to
+.Xr newfs (8),
+or by updating the super block of an existing filesystem using
+.Xr tunefs (8).
+.PP
+Finally, a third, less common consideration is the attributes of
+the disk itself. The fragment size should not be smaller than the
+physical sector size of the disk. As an example, the HP magneto-optical
+disks have 1024 byte physical sectors. Using a 512 byte fragment size
+on such disks will work but is extremely inefficient.
+.PP
+Note that the above discussion considers block sizes of up to only 8k.
+As of the 4.4 release, the maximum block size has been increased to 64k.
+This allows an entirely new set of block/fragment combinations for which
+there is little experience to date.
+In general though, unless a filesystem is to be used
+for a special purpose application (for example, storing
+image processing data), we recommend using the
+values supplied above.
+Remember that the current
+implementation limits the block size to at most 64 kbytes
+and the ratio of block size versus fragment size must be 1, 2, 4, or 8.
+.PP
+The disk geometry information used by the filesystem
+affects the block layout policies employed. The file
+.Pn /etc/disktab ,
+as supplied, contains the data for most
+all drives supported by the system. Before constructing
+a filesystem with
+.Xr newfs (8)
+you should label the disk (if it has not yet been labeled,
+and the driver supports labels).
+If labels cannot be used, you must instead
+specify the type of disk on which the filesystem resides;
+.Xr newfs
+then reads
+.Pn /etc/disktab
+instead of the pack label.
+This file also contains the default
+filesystem partition
+sizes, and default block and fragment sizes. To
+override any of the default values you can modify the file,
+edit the disk label,
+or use an option to
+.Xr newfs .
+.Sh 3 "Implementing a layout"
+.PP
+To put a chosen disk layout into effect, you should use the
+.Xr newfs (8)
+command to create each new filesystem.
+Each filesystem must also be added to the file
+.Pn /etc/fstab
+so that it will be checked and mounted when the system is bootstrapped.
+.PP
+First we will consider a system with a single disk.
+There is little real choice on how to do the layout;
+the root filesystem goes in the ``a'' partition,
+.Pn /usr
+goes in the ``e'' partition, and
+.Pn /var
+fills out the remainder of the disk in the ``f'' partition.
+This is the organization used if you loaded the disk-image root filesystem.
+With the addition of a memory-based
+.Pn /tmp
+filesystem, its fstab entry would be as follows:
+.TS
+center;
+lfC lfC l l n n.
+/dev/\*(Dk0a / ufs rw 1 1
+/dev/\*(Dk0b none swap sw 0 0
+/dev/\*(Dk0b /tmp mfs rw,-s=14000,-b=8192,-f=1024,-T=sd660 0 0
+/dev/\*(Dk0e /usr ufs ro 1 2
+/dev/\*(Dk0f /var ufs rw 1 2
+.TE
+.PP
+If we had a second disk, we would split the load between the drives.
+On the second disk, we place the
+.Pn /usr
+and
+.Pn /var
+filesystems in their usual \*(Dk1e and \*(Dk1f
+partitions respectively.
+The \*(Dk1b partition would be used as a second paging area,
+and the \*(Dk1a partition left as a spare root filesystem
+(alternatively \*(Dk1a could be used for
+.Pn /var/tmp ).
+The first disk still holds the
+the root filesystem in \*(Dk0a, and the primary swap area in \*(Dk0b.
+The \*(Dk0e partition is used to hold home directories in
+.Pn /var/users .
+The \*(Dk0f partition can be used for
+.Pn /usr/src
+or alternately the \*(Dk0e partition can be extended to cover
+the rest of the disk with
+.Xr disklabel (8).
+As before, the
+.Pn /tmp
+directory is a memory-based filesystem.
+Note that to interleave the paging between the two disks
+you must build a system configuration that specifies:
+.DS
+config vmunix root on \*(Dk0 swap on \*(Dk0 and \*(Dk1
+.DE
+The
+.Pn /etc/fstab
+file would then contain
+.TS
+center;
+lfC lfC l l n n.
+/dev/\*(Dk0a / ufs rw 1 1
+/dev/\*(Dk0b none swap sw 0 0
+/dev/\*(Dk1b none swap sw 0 0
+/dev/\*(Dk0b /tmp mfs rw,-s=14000,-b=8192,-f=1024,-T=sd660 0 0
+/dev/\*(Dk1e /usr ufs ro 1 2
+/dev/\*(Dk0f /usr/src ufs rw 1 2
+/dev/\*(Dk1f /var ufs rw 1 2
+/dev/\*(Dk0e /var/users ufs rw 1 2
+.TE
+.PP
+To make the
+.Pn /var
+filesystem we would do:
+.DS
+\fB#\fP \fIcd /dev\fP
+\fB#\fP \fIMAKEDEV \*(Dk1\fP
+\fB#\fP \fIdisklabel -wr \*(Dk1 "disk type" "disk name"\fP
+\fB#\fP \fInewfs \*(Dk1f\fP
+(information about filesystem prints out)
+\fB#\fP \fImkdir /var\fP
+\fB#\fP \fImount /dev/\*(Dk1f /var\fP
+.DE
+.Sh 2 "Installing the rest of the system"
+.PP
+At this point you should have your disks partitioned.
+The next step is to extract the rest of the data from the tape.
+At a minimum you need to set up the
+.Pn /var
+and
+.Pn /usr
+filesystems.
+You may also want to extract some or all the program sources.
+Since not all architectures support tape drives or don't support the
+correct ones, you may need to extract the files indirectly using
+.Xr rsh (1).
+For example, for a directly connected tape drive you might do:
+.DS
+\fB#\fP \fImt -f /dev/nr\*(Mt0 fsf\fP
+\fB#\fP \fItar xbpf \*(Bz /dev/nr\*(Mt0\fP
+.DE
+The equivalent indirect procedure (where the tape drive is on machine ``foo'')
+is:
+.DS
+\fB#\fP \fIrsh foo mt -f /dev/nr\*(Mt0 fsf\fP
+\fB#\fP \fIrsh foo dd if=/dev/nr\*(Mt0 bs=\*(Bzb | tar xbpf \*(Bz -\fP
+.DE
+Obviously, the target machine must be connected to the local network
+for this to work.
+To do this:
+.DS
+\fB#\fP \fIecho 127.0.0.1 localhost >> /etc/hosts\fP
+\fB#\fP \fIecho \fPyour.host.inet.number myname.my.domain myname\fI >> /etc/hosts\fP
+\fB#\fP \fIecho \fPfriend.host.inet.number myfriend.my.domain myfriend\fI >> /etc/hosts\fP
+\fB#\fP \fIifconfig le0 inet \fPmyname
+.DE
+where the ``host.inet.number'' fields are the IP addresses for your host and
+the host with the tape drive
+and the ``my.domain'' fields are the names of your machine and the tape-hosting
+machine.
+See sections 4.4 and 5 for more information on setting up the network.
+.PP
+Assuming a directly connected tape drive, here is how to extract and
+install
+.Pn /var
+and
+.Pn /usr :
+.br
+.ne 5
+.TS
+lw(2i) l.
+\fB#\fP \fImount \-uw /dev/\*(Dk#a /\fP (read-write mount root filesystem)
+\fB#\fP \fIdate yymmddhhmm\fP (set date, see \fIdate\fP\|(1))
+\&....
+\fB#\fP \fIpasswd -l root\fP (set password for super-user)
+\fBNew password:\fP (password will not echo)
+\fBRetype new password:\fP
+\fB#\fP \fIpasswd -l toor\fP (set password for super-user)
+\fBNew password:\fP (password will not echo)
+\fBRetype new password:\fP
+\fB#\fP \fIhostname mysitename\fP (set your hostname)
+\fB#\fP \fInewfs r\*(Dk#p\fP (create empty user filesystem)
+(\fI\*(Dk\fP is the disk type, \fI#\fP is the unit number,
+\fIp\fP is the partition; this takes a few minutes)
+\fB#\fP \fImount /dev/\*(Dk#p /var\fP (mount the var filesystem)
+\fB#\fP \fIcd /var\fP (make /var the current directory)
+\fB#\fP \fImt -f /dev/nr\*(Mt0 fsf\fP (space to end of previous tape file)
+\fB#\fP \fItar xbpf \*(Bz /dev/nr\*(Mt0\fP (extract all of var)
+(this takes a few minutes)
+\fB#\fP \fInewfs r\*(Dk#p\fP (create empty user filesystem)
+(as before \fI\*(Dk\fP is the disk type, \fI#\fP is the unit number,
+\fIp\fP is the partition)
+\fB#\fP \fImount /dev/\*(Dk#p /mnt\fP (mount the new /usr in temporary location)
+\fB#\fP \fIcd /mnt\fP (make /mnt the current directory)
+\fB#\fP \fImt -f /dev/nr\*(Mt0 fsf\fP (space to end of previous tape file)
+\fB#\fP \fItar xbpf \*(Bz /dev/nr\*(Mt0\fP (extract all of usr except usr/src)
+(this takes about 15-20 minutes)
+\fB#\fP \fIcd /\fP (make / the current directory)
+\fB#\fP \fIumount /mnt\fP (unmount from temporary mount point)
+\fB#\fP \fIrm -r /usr/*\fP (remove excess bootstrap binaries)
+\fB#\fP \fImount /dev/\*(Dk#p /usr\fP (remount /usr)
+.TE
+If no disk label has been installed on the disk, the
+.Xr newfs
+command will require a third argument to specify the disk type,
+using one of the names in
+.Pn /etc/disktab .
+If the tape had been rewound or positioned incorrectly before the
+.Xr tar ,
+to extract
+.Pn /var
+it may be repositioned by the following commands.
+.DS
+\fB#\fP \fImt -f /dev/nr\*(Mt0 rew\fP
+\fB#\fP \fImt -f /dev/nr\*(Mt0 fsf 1\fP
+.DE
+The data on the second and third tape files has now been extracted.
+If you are using 6250bpi tapes, the first reel of the
+distribution is no longer needed; you should now mount the second
+reel instead. The installation procedure continues from this
+point on the 8mm tape.
+The next step is to extract the sources.
+As previously noted,
+.Pn /usr/src
+.\" XXX Check
+requires about 250-340Mb of space.
+Ideally sources should be in a separate filesystem;
+if you plan to put them into your
+.Pn /usr
+filesystem, it will need at least 500Mb of space.
+Assuming that you will be using a separate filesystem on \*(Dk0f for
+.Pn /usr/src ,
+you will start by creating and mounting it:
+.DS
+\fB#\fP \fInewfs \*(Dk0f\fP
+(information about filesystem prints out)
+\fB#\fP \fImkdir /usr/src\fP
+\fB#\fP \fImount /dev/\*(Dk0f /usr/src\fP
+.DE
+.LP
+First you will extract the kernel source:
+.DS
+.TS
+lw(2i) l.
+\fB#\fP \fIcd /usr/src\fP
+\fB#\fP \fImt -f /dev/nr\*(Mt0 fsf\fP (space to end of previous tape file)
+(this should only be done on Exabyte distributions)
+\fB#\fP \fItar xpbf \*(Bz /dev/nr\*(Mt0\fP (extract the kernel sources)
+(this takes about 15-30 minutes)
+.TE
+.DE
+.LP
+The next tar file contains the sources for the utilities.
+It is extracted as follows:
+.DS
+.TS
+lw(2i) l.
+\fB#\fP \fIcd /usr/src\fP
+\fB#\fP \fImt -f /dev/nr\*(Mt0 fsf\fP (space to end of previous tape file)
+\fB#\fP \fItar xpbf \*(Bz /dev/rmt12\fP (extract the utility source)
+(this takes about 30-60 minutes)
+.TE
+.DE
+.PP
+If you are using 6250bpi tapes, the second reel of the
+distribution is no longer needed; you should now mount the third
+reel instead. The installation procedure continues from this
+point on the 8mm tape.
+.PP
+The next tar file contains the sources for the contributed software.
+It is extracted as follows:
+.DS
+.TS
+lw(2i) l.
+\fB#\fP \fIcd /usr/src\fP
+\fB#\fP \fImt -f /dev/nr\*(Mt0 fsf\fP (space to end of previous tape file)
+(this should only be done on Exabyte distributions)
+\fB#\fP \fItar xpbf \*(Bz /dev/rmt12\fP (extract the contributed software source)
+(this takes about 30-60 minutes)
+.TE
+.DE
+.PP
+If you received a distribution on 8mm Exabyte tape,
+there is one additional tape file on the distribution tape
+that has not been installed to this point; it contains the
+sources for X11R5 in
+.Xr tar (1)
+format. As distributed, X11R5 should be placed in
+.Pn /usr/src/X11R5 .
+.DS
+.TS
+lw(2i) l.
+\fB#\fP \fIcd /usr/src\fP
+\fB#\fP \fImt -f /dev/nr\*(Mt0 fsf\fP (space to end of previous tape file)
+\fB#\fP \fItar xpbf \*(Bz /dev/nr\*(Mt0\fP (extract the X11R5 source)
+(this takes about 30-60 minutes)
+.TE
+.DE
+Many of the X11 utilities search using the path
+.Pn /usr/X11 ,
+so be sure that you have a symbolic link that points at
+the location of your X11 binaries (here, X11R5).
+.PP
+Having now completed the extraction of the sources,
+you may want to verify that your
+.Pn /usr/src
+filesystem is consistent.
+To do so, you must unmount it, and run
+.Xr fsck (8);
+assuming that you used \*(Dk0f you would proceed as follows:
+.DS
+.TS
+lw(2i) l.
+\fB#\fP \fIcd /\fP (change directory, back to the root)
+\fB#\fP \fIumount /usr/src\fP (unmount /usr/src)
+\fB#\fP \fIfsck /dev/r\*(Dk0f\fP
+.TE
+.DE
+The output from
+.Xr fsck
+should look something like:
+.DS
+.B
+** /dev/r\*(Dk0f
+** Last Mounted on /usr/src
+** Phase 1 - Check Blocks and Sizes
+** Phase 2 - Check Pathnames
+** Phase 3 - Check Connectivity
+** Phase 4 - Check Reference Counts
+** Phase 5 - Check Cyl groups
+23000 files, 261000 used, 39000 free (2200 frags, 4600 blocks)
+.R
+.DE
+.PP
+If there are inconsistencies in the filesystem, you may be prompted
+to apply corrective action; see the
+.Xr fsck (8)
+or \fIFsck \(en The UNIX File System Check Program\fP (SMM:3) for more details.
+.PP
+To use the
+.Pn /usr/src
+filesystem, you should now remount it with:
+.DS
+\fB#\fP \fImount /dev/\*(Dk0f /usr/src\fP
+.DE
+or if you have made an entry for it in
+.Pn /etc/fstab
+you can remount it with:
+.DS
+\fB#\fP \fImount /usr/src\fP
+.DE
+.Sh 2 "Additional conversion information"
+.PP
+After setting up the new \*(4B filesystems, you may restore the user
+files that were saved on tape before beginning the conversion.
+Note that the \*(4B
+.Xr restore
+program does its work on a mounted filesystem using normal system operations.
+This means that filesystem dumps may be restored even
+if the characteristics of the filesystem changed.
+To restore a dump tape for, say, the
+.Pn /a
+filesystem something like the following would be used:
+.DS
+\fB#\fP \fImkdir /a\fP
+\fB#\fP \fInewfs \*(Dk#p\fI
+\fB#\fP \fImount /dev/\*(Dk#p /a\fP
+\fB#\fP \fIcd /a\fP
+\fB#\fP \fIrestore x\fP
+.DE
+.PP
+If
+.Xr tar
+images were written instead of doing a dump, you should
+be sure to use its `\-p' option when reading the files back. No matter
+how you restore a filesystem, be sure to unmount it and and check its
+integrity with
+.Xr fsck (8)
+when the job is complete.
diff --git a/share/doc/smm/01.setup/3.t b/share/doc/smm/01.setup/3.t
new file mode 100644
index 0000000..b0f0c2b
--- /dev/null
+++ b/share/doc/smm/01.setup/3.t
@@ -0,0 +1,2001 @@
+.\" Copyright (c) 1980, 1986, 1988, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)3.t 8.1 (Berkeley) 7/27/93
+.\"
+.ds lq ``
+.ds rq ''
+.ds RH "Upgrading a \*(Ps System
+.ds CF \*(Dy
+.Sh 1 "Upgrading a \*(Ps system"
+.PP
+This section describes the procedure for upgrading a \*(Ps
+system to \*(4B. This procedure may vary according to the version of
+the system running before conversion.
+If you are converting from a
+System V system, some of this section will still apply (in particular,
+the filesystem conversion). However, many of the system configuration
+files are different, and the executable file formats are completely
+incompatible.
+.PP
+In particular be wary when using this information to upgrade
+a \*(Ps HP300 system.
+There are at least four different versions of ``\*(Ps'' out there:
+.IP 1)
+HPBSD 1.x from Utah.
+.br
+This was the original version of \*(Ps for HP300s from which the
+other variants (and \*(4B) are derived.
+It is largely a \*(Ps system with Sun's NFS 3.0 filesystem code and
+some \*(Ps-Tahoe features (e.g. networking code).
+Since the filesystem code is 4.2/4.3 vintage and the filesystem
+hierarchy is largely \*(Ps, most of this section should apply.
+.IP 2)
+MORE/bsd from Mt. Xinu.
+.br
+This is a \*(Ps-Tahoe vintage system with Sun's NFS 4.0 filesystem code
+upgraded with Tahoe UFS features.
+The instructions for \*(Ps-Tahoe should largely apply.
+.IP 3)
+\*(Ps-Reno from CSRG.
+.br
+At least one site bootstrapped HP300 support from the Reno distribution.
+The Reno filesystem code was somewhere between \*(Ps and \*(4B: the VFS switch
+had been added but many of the UFS features (e.g. ``inline'' symlinks)
+were missing.
+The filesystem hierarchy reorganization first appeared in this release.
+Be extremely careful following these instructions if you are
+upgrading from the Reno distribution.
+.IP 4)
+HPBSD 2.0 from Utah.
+.br
+As if things were not bad enough already,
+this release has the \*(4B filesystem and networking code
+as well as some utilities, but still has a \*(Ps hierarchy.
+No filesystem conversions are necessary for this upgrade,
+but files will still need to be moved around.
+.Sh 2 "Installation overview"
+.PP
+If you are running \*(Ps, upgrading your system
+involves replacing your kernel and system utilities.
+In general, there are three possible ways to install a new \*(Bs distribution:
+(1) boot directly from the distribution tape, use it to load new binaries
+onto empty disks, and then merge or restore any existing configuration files
+and filesystems;
+(2) use an existing \*(Ps or later system to extract the root and
+.Pn /usr
+filesystems from the distribution tape,
+boot from the new system, then merge or restore existing
+configuration files and filesystems; or
+(3) extract the sources from the distribution tape onto an existing system,
+and use that system to cross-compile and install \*(4B.
+For this release, the second alternative is strongly advised,
+with the third alternative reserved as a last resort.
+In general, older binaries will continue to run under \*(4B,
+but there are many exceptions that are on the critical path
+for getting the system running.
+Ideally, the new system binaries (root and
+.Pn /usr
+filesystems) should be installed on spare disk partitions,
+then site-specific files should be merged into them.
+Once the new system is up and fully merged, the previous root and
+.Pn /usr
+filesystems can be reused.
+Other existing filesystems can be retained and used,
+except that (as usual) the new
+.Xr fsck
+should be run before they are mounted.
+.PP
+It is \fBSTRONGLY\fP advised that you make full dumps of each filesystem
+before beginning, especially any that you intend to modify in place
+during the merge.
+It is also desirable to run filesystem checks
+of all filesystems to be converted to \*(4B before shutting down.
+This is an excellent time to review your disk configuration
+for possible tuning of the layout.
+Most systems will need to provide a new filesystem for system use
+mounted on
+.Pn /var
+(see below).
+However, the
+.Pn /tmp
+filesystem can be an MFS virtual-memory-resident filesystem,
+potentially freeing an existing disk partition.
+(Additional swap space may be desirable as a consequence.)
+See
+.Xr mount_mfs (8).
+.PP
+The recommended installation procedure includes the following steps.
+The order of these steps will probably vary according to local needs.
+.IP \(bu
+Extract root and
+.Pn /usr
+filesystems from the distribution tapes.
+.IP \(bu
+Extract kernel and/or user-level sources from the distribution tape
+if space permits.
+This can serve as the backup documentation as needed.
+.IP \(bu
+Configure and boot a kernel for the local system.
+This can be delayed if the generic kernel from the distribution
+supports enough hardware to proceed.
+.IP \(bu
+Build a skeletal
+.Pn /var
+filesystem (see
+.Xr mtree (8)).
+.IP \(bu
+Merge site-dependent configuration files from
+.Pn /etc
+and
+.Pn /usr/lib
+into the new
+.Pn /etc
+directory.
+Note that many file formats and contents have changed; see section 3.4
+of this document.
+.IP \(bu
+Copy or merge files from
+.Pn /usr/adm ,
+.Pn /usr/spool ,
+.Pn /usr/preserve ,
+.Pn /usr/lib ,
+and other locations into
+.Pn /var .
+.IP \(bu
+Merge local macros, dictionaries, etc. into
+.Pn /usr/share .
+.IP \(bu
+Merge and update local software to reflect the system changes.
+.IP \(bu
+Take off the rest of the morning, you've earned it!
+.PP
+Section 3.2 lists the files to be saved as part of the conversion process.
+Section 3.3 describes the bootstrap process.
+Section 3.4 discusses the merger of the saved files back into the new system.
+Section 3.5 gives an overview of the major
+bug fixes and changes between \*(Ps and \*(4B.
+Section 3.6 provides general hints on possible problems to be
+aware of when converting from \*(Ps to \*(4B.
+.Sh 2 "Files to save"
+.PP
+The following list enumerates the standard set of files you will want to
+save and suggests directories in which site-specific files should be present.
+This list will likely be augmented with non-standard files you
+have added to your system.
+If you do not have enough space to create parallel
+filesystems, you should create a
+.Xr tar
+image of the following files before the new filesystems are created.
+The rest of this subsection describes where theses files
+have moved and how they have changed.
+.TS
+lfC c l.
+/.cshrc \(dg root csh startup script (moves to \f(CW/root/.cshrc\fP)
+/.login \(dg root csh login script (moves to \f(CW/root/.login\fP)
+/.profile \(dg root sh startup script (moves to \f(CW/root/.profile\fP)
+/.rhosts \(dg for trusted machines and users (moves to \f(CW/root/.rhosts\fP)
+/etc/disktab \(dd in case you changed disk partition sizes
+/etc/fstab * disk configuration data
+/etc/ftpusers \(dg for local additions
+/etc/gettytab \(dd getty database
+/etc/group * group data base
+/etc/hosts \(dg for local host information
+/etc/hosts.equiv \(dg for local host equivalence information
+/etc/hosts.lpd \(dg printer access file
+/etc/inetd.conf * Internet services configuration data
+/etc/named* \(dg named configuration files
+/etc/netstart \(dg network initialization
+/etc/networks \(dg for local network information
+/etc/passwd * user data base
+/etc/printcap * line printer database
+/etc/protocols \(dd in case you added any local protocols
+/etc/rc * for any local additions
+/etc/rc.local * site specific system startup commands
+/etc/remote \(dg auto-dialer configuration
+/etc/services \(dd for local additions
+/etc/shells \(dd list of valid shells
+/etc/syslog.conf * system logger configuration
+/etc/securettys * merged into ttys
+/etc/ttys * terminal line configuration data
+/etc/ttytype * merged into ttys
+/etc/termcap \(dd for any local entries that may have been added
+/lib \(dd for any locally developed language processors
+/usr/dict/* \(dd for local additions to words and papers
+/usr/include/* \(dd for local additions
+/usr/lib/aliases * mail forwarding data base (moves to \f(CW/etc/aliases\fP)
+/usr/lib/crontab * cron daemon data base (moves to \f(CW/etc/crontab\fP)
+/usr/lib/crontab.local * local cron daemon data base (moves to \f(CW/etc/crontab.local\fP)
+/usr/lib/lib*.a \(dg for local libraries
+/usr/lib/mail.rc \(dg system-wide mail(1) initialization (moves to \f(CW/etc/mail.rc\fP)
+/usr/lib/sendmail.cf * sendmail configuration (moves to \f(CW/etc/sendmail.cf\fP)
+/usr/lib/tmac/* \(dd for locally developed troff/nroff macros (moves to \f(CW/usr/share/tmac/*\fP)
+/usr/lib/uucp/* \(dg for local uucp configuration files
+/usr/man/manl * for manual pages for locally developed programs (moves to \f(CW/usr/local/man\fP)
+/usr/spool/* \(dg for current mail, news, uucp files, etc. (moves to \f(CW/var/spool\fP)
+/usr/src/local \(dg for source for locally developed programs
+/sys/conf/HOST \(dg configuration file for your machine (moves to \f(CW/sys/<arch>/conf\fP)
+/sys/conf/files.HOST \(dg list of special files in your kernel (moves to \f(CW/sys/<arch>/conf\fP)
+/*/quotas * filesystem quota files (moves to \f(CW/*/quotas.user\fP)
+.TE
+.DS
+\(dg\|Files that can be used from \*(Ps without change.
+\(dd\|Files that need local changes merged into \*(4B files.
+*\|Files that require special work to merge and are discussed in section 3.4.
+.DE
+.Sh 2 "Installing \*(4B"
+.PP
+The next step is to build a working \*(4B system.
+This can be done by following the steps in section 2 of
+this document for extracting the root and
+.Pn /usr
+filesystems from the distribution tape onto unused disk partitions.
+For the SPARC, the root filesystem dump on the tape could also be
+extracted directly.
+For the HP300 and DECstation, the raw disk image can be copied
+into an unused partition and this partition can then be dumped
+to create an image that can be restored.
+The exact procedure chosen will depend on the disk configuration
+and the number of suitable disk partitions that may be used.
+It is also desirable to run filesystem checks
+of all filesystems to be converted to \*(4B before shutting down.
+In any case, this is an excellent time to review your disk configuration
+for possible tuning of the layout.
+Section 2.5 and
+.Xr config (8)
+are required reading.
+.LP
+The filesystem in \*(4B has been reorganized in an effort to
+meet several goals:
+.IP 1)
+The root filesystem should be small.
+.IP 2)
+There should be a per-architecture centrally-shareable read-only
+.Pn /usr
+filesystem.
+.IP 3)
+Variable per-machine directories should be concentrated below
+a single mount point named
+.Pn /var .
+.IP 4)
+Site-wide machine independent shareable text files should be separated
+from architecture specific binary files and should be concentrated below
+a single mount point named
+.Pn /usr/share .
+.LP
+These goals are realized with the following general layouts.
+The reorganized root filesystem has the following directories:
+.TS
+lfC l.
+/etc (config files)
+/bin (user binaries needed when single-user)
+/sbin (root binaries needed when single-user)
+/local (locally added binaries used only by this machine)
+/tmp (mount point for memory based filesystem)
+/dev (local devices)
+/home (mount point for AMD)
+/var (mount point for per-machine variable directories)
+/usr (mount point for multiuser binaries and files)
+.TE
+.LP
+The reorganized
+.Pn /usr
+filesystem has the following directories:
+.TS
+lfC l.
+/usr/bin (user binaries)
+/usr/contrib (software contributed to \*(4B)
+/usr/games (binaries for games, score files in \f(CW/var\fP)
+/usr/include (standard include files)
+/usr/lib (lib*.a from old \f(CW/usr/lib\fP)
+/usr/libdata (databases from old \f(CW/usr/lib\fP)
+/usr/libexec (executables from old \f(CW/usr/lib\fP)
+/usr/local (locally added binaries used site-wide)
+/usr/old (deprecated binaries)
+/usr/sbin (root binaries)
+/usr/share (mount point for site-wide shared text)
+/usr/src (mount point for sources)
+.TE
+.LP
+The reorganized
+.Pn /usr/share
+filesystem has the following directories:
+.TS
+lfC l.
+/usr/share/calendar (various useful calendar files)
+/usr/share/dict (dictionaries)
+/usr/share/doc (\*(4B manual sources)
+/usr/share/games (games text files)
+/usr/share/groff_font (groff font information)
+/usr/share/man (typeset manual pages)
+/usr/share/misc (dumping ground for random text files)
+/usr/share/mk (templates for \*(4B makefiles)
+/usr/share/skel (template user home directory files)
+/usr/share/tmac (various groff macro packages)
+/usr/share/zoneinfo (information on time zones)
+.TE
+.LP
+The reorganized
+.Pn /var
+filesystem has the following directories:
+.TS
+lfC l.
+/var/account (accounting files, formerly \f(CW/usr/adm\fP)
+/var/at (\fIat\fP\|(1) spooling area)
+/var/backups (backups of system files)
+/var/crash (crash dumps)
+/var/db (system-wide databases, e.g. tags)
+/var/games (score files)
+/var/log (log files)
+/var/mail (users mail)
+/var/obj (hierarchy to build \f(CW/usr/src\fP)
+/var/preserve (preserve area for vi)
+/var/quotas (directory to store quota files)
+/var/run (directory to store *.pid files)
+/var/rwho (rwho databases)
+/var/spool/ftp (home directory for anonymous ftp)
+/var/spool/mqueue (sendmail spooling directory)
+/var/spool/news (news spooling area)
+/var/spool/output (printer spooling area)
+/var/spool/uucp (uucp spooling area)
+/var/tmp (disk-based temporary directory)
+/var/users (root of per-machine user home directories)
+.TE
+.PP
+The \*(4B bootstrap routines pass the identity of the boot device
+through to the kernel.
+The kernel then uses that device as its root filesystem.
+Thus, for example, if you boot from
+.Pn /dev/\*(Dk1a ,
+the kernel will use
+.Pn \*(Dk1a
+as its root filesystem. If
+.Pn /dev/\*(Dk1b
+is configured as a swap partition,
+it will be used as the initial swap area,
+otherwise the normal primary swap area (\c
+.Pn /dev/\*(Dk0b )
+will be used.
+The \*(4B bootstrap is backward compatible with \*(Ps,
+so you can replace your old bootstrap if you use it
+to boot your first \*(4B kernel.
+However, the \*(Ps bootstrap cannot access \*(4B filesystems,
+so if you plan to convert your filesystems to \*(4B,
+you must install a new bootstrap \fIbefore\fP doing the conversion.
+Note that SPARC users cannot build a \*(4B compatible version
+of the bootstrap, so must \fInot\fP convert their root filesystem
+to the new \*(4B format.
+.PP
+Once you have extracted the \*(4B system and booted from it,
+you will have to build a kernel customized for your configuration.
+If you have any local device drivers,
+they will have to be incorporated into the new kernel.
+See section 4.1.3 and ``Building 4.3BSD UNIX Systems with Config'' (SMM:2).
+.PP
+If converting from \*(Ps, your old filesystems should be converted.
+If you've modified the partition
+sizes from the original \*(Ps ones, and are not already using the
+\*(4B disk labels, you will have to modify the default disk partition
+tables in the kernel. Make the necessary table changes and boot
+your custom kernel \fBBEFORE\fP trying to access any of your old
+filesystems! After doing this, if necessary, the remaining filesystems
+may be converted in place by running the \*(4B version of
+.Xr fsck (8)
+on each filesystem and allowing it to make the necessary corrections.
+The new version of
+.Xr fsck
+is more strict about the size of directories than
+the version supplied with \*(Ps.
+Thus the first time that it is run on a \*(Ps filesystem,
+it will produce messages of the form:
+.DS
+\fBDIRECTORY ...: LENGTH\fP xx \fBNOT MULTIPLE OF 512 (ADJUSTED)\fP
+.DE
+Length ``xx'' will be the size of the directory;
+it will be expanded to the next multiple of 512 bytes.
+The new
+.Xr fsck
+will also set default \fIinterleave\fP and
+\fInpsect\fP (number of physical sectors per track) values on older
+filesystems, in which these fields were unused spares; this correction
+will produce messages of the form:
+.DS
+\fBIMPOSSIBLE INTERLEAVE=0 IN SUPERBLOCK (SET TO DEFAULT)\fP\**
+\fBIMPOSSIBLE NPSECT=0 IN SUPERBLOCK (SET TO DEFAULT)\fP
+.DE
+.FS
+The defaults are to set \fIinterleave\fP to 1 and
+\fInpsect\fP to \fInsect\fP.
+This is correct on most drives;
+it affects only performance (usually virtually unmeasurably).
+.FE
+Filesystems that have had their interleave and npsect values
+set will be diagnosed by the old
+.Xr fsck
+as having a bad superblock; the old
+.Xr fsck
+will run only if given an alternate superblock
+(\fIfsck \-b32\fP),
+in which case it will re-zero these fields.
+The \*(4B kernel will internally set these fields to their defaults
+if fsck has not done so; again, the \fI\-b32\fP option may be
+necessary for running the old
+.Xr fsck .
+.PP
+In addition, \*(4B removes several limits on filesystem sizes
+that were present in \*(Ps.
+The limited filesystems
+continue to work in \*(4B, but should be converted
+as soon as it is convenient
+by running
+.Xr fsck
+with the \fI\-c 2\fP option.
+The sequence \fIfsck \-p \-c 2\fP will update them all,
+fix the interleave and npsect fields,
+fix any incorrect directory lengths,
+expand maximum uid's and gid's to 32-bits,
+place symbolic links less than 60 bytes into their inode,
+and fill in directory type fields all at once.
+The new filesystem formats are incompatible with older systems.
+If you wish to continue using these filesystems with the older
+systems you should make only the compatible changes using
+\fIfsck \-c 1\fP.
+.Sh 2 "Merging your files from \*(Ps into \*(4B"
+.PP
+When your system is booting reliably and you have the \*(4B root and
+.Pn /usr
+filesystems fully installed you will be ready
+to continue with the next step in the conversion process,
+merging your old files into the new system.
+.PP
+If you saved the files on a
+.Xr tar
+tape, extract them into a scratch directory, say
+.Pn /usr/convert :
+.DS
+\fB#\fP \fImkdir /usr/convert\fP
+\fB#\fP \fIcd /usr/convert\fP
+\fB#\fP \fItar xp\fP
+.DE
+.PP
+The data files marked in the previous table with a dagger (\(dg)
+may be used without change from the previous system.
+Those data files marked with a double dagger (\(dd) have syntax
+changes or substantial enhancements.
+You should start with the \*(4B version and carefully
+integrate any local changes into the new file.
+Usually these local changes can be incorporated
+without conflict into the new file;
+some exceptions are noted below.
+The files marked with an asterisk (*) require
+particular attention and are discussed below.
+.PP
+As described in section 3.3,
+the most immediately obvious change in \*(4B is the reorganization
+of the system filesystems.
+Users of certain recent vendor releases have seen this general organization,
+although \*(4B takes the reorganization a bit further.
+The directories most affected are
+.Pn /etc ,
+that now contains only system configuration files;
+.Pn /var ,
+a new filesystem containing per-system spool and log files; and
+.Pn /usr/share,
+that contains most of the text files shareable across architectures
+such as documentation and macros.
+System administration programs formerly in
+.Pn /etc
+are now found in
+.Pn /sbin
+and
+.Pn /usr/sbin .
+Various programs and data files formerly in
+.Pn /usr/lib
+are now found in
+.Pn /usr/libexec
+and
+.Pn /usr/libdata ,
+respectively.
+Administrative files formerly in
+.Pn /usr/adm
+are in
+.Pn /var/account
+and, similarly, log files are now in
+.Pn /var/log .
+The directory
+.Pn /usr/ucb
+has been merged into
+.Pn /usr/bin ,
+and the sources for programs in
+.Pn /usr/bin
+are in
+.Pn /usr/src/usr.bin .
+Other source directories parallel the destination directories;
+.Pn /usr/src/etc
+has been greatly expanded, and
+.Pn /usr/src/share
+is new.
+The source for the manual pages, in general, are with the source
+code for the applications they document.
+Manual pages not closely corresponding to an application program
+are found in
+.Pn /usr/src/share/man .
+The locations of all man pages is listed in
+.Pn /usr/src/share/man/man0/man[1-8] .
+The manual page
+.Xr hier (7)
+has been updated and made more detailed;
+it is included in the printed documentation.
+You should review it to familiarize yourself with the new layout.
+.PP
+A new utility,
+.Xr mtree (8),
+is provided to build and check filesystem hierarchies
+with the proper contents, owners and permissions.
+Scripts are provided in
+.Pn /etc/mtree
+(and
+.Pn /usr/src/etc/mtree )
+for the root,
+.Pn /usr
+and
+.Pn /var
+filesystems.
+Once a filesystem has been made for
+.Pn /var ,
+.Xr mtree
+can be used to create a directory hierarchy there
+or you can simply use tar to extract the prototype from
+the second file of the distribution tape.
+.Sh 3 "Changes in the \f(CW/etc\fP directory"
+.PP
+The
+.Pn /etc
+directory now contains nearly all the host-specific configuration
+files.
+Note that some file formats have changed,
+and those configuration files containing pathnames are nearly all affected
+by the reorganization.
+See the examples provided in
+.Pn /etc
+(installed from
+.Pn /usr/src/etc )
+as a guide.
+The following table lists some of the local configuration files
+whose locations and/or contents have changed.
+.TS
+l l l
+lfC lfC l.
+\*(Ps and Earlier \*(4B Comments
+_ _ _
+/etc/fstab /etc/fstab new format; see below
+/etc/inetd.conf /etc/inetd.conf pathnames of executables changed
+/etc/printcap /etc/printcap pathnames changed
+/etc/syslog.conf /etc/syslog.conf pathnames of log files changed
+/etc/ttys /etc/ttys pathnames of executables changed
+/etc/passwd /etc/master.passwd new format; see below
+/usr/lib/sendmail.cf /etc/sendmail.cf changed pathnames
+/usr/lib/aliases /etc/aliases may contain changed pathnames
+/etc/*.pid /var/run/*.pid
+
+.T&
+l l l
+lfC lfC l.
+New in \*(Ps-Tahoe \*(4B Comments
+_ _ _
+/usr/games/dm.config /etc/dm.conf configuration for games (see \fIdm\fP\|(8))
+/etc/zoneinfo/localtime /etc/localtime timezone configuration
+/etc/zoneinfo /usr/share/zoneinfo timezone configuration
+.TE
+.ne 1.5i
+.TS
+l l l
+lfC lfC l.
+ New in \*(4B Comments
+_ _ _
+ /etc/aliases.db database version of the aliases file
+ /etc/amd-home location database of home directories
+ /etc/amd-vol location database of exported filesystems
+ /etc/changelist \f(CW/etc/security\fP files to back up
+ /etc/csh.cshrc system-wide csh(1) initialization file
+ /etc/csh.login system-wide csh(1) login file
+ /etc/csh.logout system-wide csh(1) logout file
+ /etc/disklabels directory for saving disklabels
+ /etc/exports NFS list of export permissions
+ /etc/ftpwelcome message displayed for ftp users; see ftpd(8)
+ /etc/kerberosIV Kerberos directory; see below
+ /etc/man.conf lists directories searched by \fIman\fP\|(1)
+ /etc/mtree directory for local mtree files; see mtree(8)
+ /etc/netgroup NFS group list used in \f(CW/etc/exports\fP
+ /etc/pwd.db non-secure hashed user data base file
+ /etc/spwd.db secure hashed user data base file
+ /etc/security daily system security checker
+.TE
+.PP
+System security changes require adding several new ``well-known'' groups to
+.Pn /etc/group .
+The groups that are needed by the system as distributed are:
+.TS
+l n l.
+name number purpose
+_
+wheel 0 users allowed superuser privilege
+daemon 1 processes that need less than wheel privilege
+kmem 2 read access to kernel memory
+sys 3 access to kernel sources
+tty 4 access to terminals
+operator 5 read access to raw disks
+bin 7 group for system binaries
+news 8 group for news
+wsrc 9 write access to sources
+games 13 access to games
+staff 20 system staff
+guest 31 system guests
+nobody 39 the least privileged group
+utmp 45 access to utmp files
+dialer 117 access to remote ports and dialers
+.TE
+Only users in the ``wheel'' group are permitted to
+.Xr su
+to ``root''.
+Most programs that manage directories in
+.Pn /var/spool
+now run set-group-id to ``daemon'' so that users cannot
+directly access the files in the spool directories.
+The special files that access kernel memory,
+.Pn /dev/kmem
+and
+.Pn /dev/mem ,
+are made readable only by group ``kmem''.
+Standard system programs that require this access are
+made set-group-id to that group.
+The group ``sys'' is intended to control access to kernel sources,
+and other sources belong to group ``wsrc.''
+Rather than make user terminals writable by all users,
+they are now placed in group ``tty'' and made only group writable.
+Programs that should legitimately have access to write on user terminals
+such as
+.Xr talkd
+and
+.Xr write
+now run set-group-id to ``tty''.
+The ``operator'' group controls access to disks.
+By default, disks are readable by group ``operator'',
+so that programs such as
+.Xr dump
+can access the filesystem information without being set-user-id to ``root''.
+The
+.Xr shutdown (8)
+program is executable only by group operator
+and is setuid to root so that members of group operator may shut down
+the system without root access.
+.PP
+The ownership and modes of some directories have changed.
+The
+.Xr at
+programs now run set-user-id ``root'' instead of ``daemon.''
+Also, the uucp directory no longer needs to be publicly writable,
+as
+.Xr tip
+reverts to privileged status to remove its lock files.
+After copying your version of
+.Pn /var/spool ,
+you should do:
+.DS
+\fB#\fP \fIchown \-R root /var/spool/at\fP
+\fB#\fP \fIchown \-R uucp.daemon /var/spool/uucp\fP
+\fB#\fP \fIchmod \-R o\-w /var/spool/uucp\fP
+.DE
+.PP
+The format of the cron table,
+.Pn /etc/crontab ,
+has been changed to specify the user-id that should be used to run a process.
+The userid ``nobody'' is frequently useful for non-privileged programs.
+Local changes are now put in a separate file,
+.Pn /etc/crontab.local .
+.PP
+Some of the commands previously in
+.Pn /etc/rc.local
+have been moved to
+.Pn /etc/rc ;
+several new functions are now handled by
+.Pn /etc/rc ,
+.Pn /etc/netstart
+and
+.Pn /etc/rc.local .
+You should look closely at the prototype version of these files
+and read the manual pages for the commands contained in it
+before trying to merge your local copy.
+Note in particular that
+.Xr ifconfig
+has had many changes,
+and that host names are now fully specified as domain-style names
+(e.g., vangogh.CS.Berkeley.EDU) for the benefit of the name server.
+.PP
+Some of the commands previously in
+.Pn /etc/daily
+have been moved to
+.Pn /etc/security ,
+and several new functions have been added to
+.Pn /etc/security
+to do nightly security checks on the system.
+The script
+.Pn /etc/daily
+runs
+.Pn /etc/security
+each night, and mails the output to the super-user.
+Some of the checks done by
+.Pn /etc/security
+are:
+.DS
+\(bu Syntax errors in the password and group files.
+\(bu Duplicate user and group names and id's.
+\(bu Dangerous search paths and umask values for the superuser.
+\(bu Dangerous values in various initialization files.
+\(bu Dangerous .rhosts files.
+\(bu Dangerous directory and file ownership or permissions.
+\(bu Globally exported filesystems.
+\(bu Dangerous owners or permissions for special devices.
+.DE
+In addition, it reports any changes to setuid and setgid files, special
+devices, or the files in
+.Pn /etc/changelist
+since the last run of
+.Pn /etc/security .
+Backup copies of the files are saved in
+.Pn /var/backups .
+Finally, the system binaries are checksummed and their permissions
+validated against the
+.Xr mtree (8)
+specifications in
+.Pn /etc/mtree .
+.PP
+The C-library and system binaries on the distribution tape
+are compiled with new versions of
+.Xr gethostbyname
+and
+.Xr gethostbyaddr
+that use the name server,
+.Xr named (8).
+If you have only a small network and are not connected
+to a large network, you can use the distributed library routines without
+any problems; they use a linear scan of the host table
+.Pn /etc/hosts
+if the name server is not running.
+If you are on the Internet or have a large local network,
+it is recommend that you set up
+and use the name server.
+For instructions on how to set up the necessary configuration files,
+refer to ``Name Server Operations Guide for BIND'' (SMM:10).
+Several programs rely on the host name returned by
+.Xr gethostname
+to determine the local domain name.
+.PP
+If you are using the name server, your
+.Xr sendmail
+configuration file will need some updates to accommodate it.
+See the ``Sendmail Installation and Operation Guide'' (SMM:8) and
+the sample
+.Xr sendmail
+configuration files in
+.Pn /usr/src/usr.sbin/sendmail/cf .
+The aliases file,
+.Pn /etc/aliases
+has also been changed to add certain well-known addresses.
+.Sh 3 "Shadow password files"
+.PP
+The password file format adds change and expiration fields
+and its location has changed to protect
+the encrypted passwords stored there.
+The actual password file is now stored in
+.Pn /etc/master.passwd .
+The hashed dbm password files do not contain encrypted passwords,
+but contain the file offset to the entry with the password in
+.Pn /etc/master.passwd
+(that is readable only by root).
+Thus, the
+.Fn getpwnam
+and
+.Fn getpwuid
+functions will no longer return an encrypted password string to non-root
+callers.
+An old-style passwd file is created in
+.Pn /etc/passwd
+by the
+.Xr vipw (8)
+and
+.Xr pwd_mkdb (8)
+programs.
+See also
+.Xr passwd (5).
+.PP
+Several new users have also been added to the group of ``well-known'' users in
+.Pn /etc/passwd .
+The current list is:
+.DS
+.TS
+l c.
+name number
+_
+root 0
+daemon 1
+operator 2
+bin 3
+games 7
+uucp 66
+nobody 32767
+.TE
+.DE
+The ``daemon'' user is used for daemon processes that
+do not need root privileges.
+The ``operator'' user-id is used as an account for dumpers
+so that they can log in without having the root password.
+By placing them in the ``operator'' group,
+they can get read access to the disks.
+The ``uucp'' login has existed long before \*(4B,
+and is noted here just to provide a common user-id.
+The password entry ``nobody'' has been added to specify
+the user with least privilege. The ``games'' user is a pseudo-user
+that controls access to game programs.
+.PP
+After installing your updated password file, you must run
+.Xr pwd_mkdb (8)
+to create the password database.
+Note that
+.Xr pwd_mkdb (8)
+is run whenever
+.Xr vipw (8)
+is run.
+.Sh 3 "The \f(CW/var\fP filesystem"
+.PP
+The spooling directories saved on tape may be restored in their
+eventual resting places without too much concern. Be sure to
+use the `\-p' option to
+.Xr tar (1)
+so that files are recreated with the same file modes.
+The following commands provide a guide for copying spool and log files from
+an existing system into a new
+.Pn /var
+filesystem.
+At least the following directories should already exist on
+.Pn /var :
+.Pn output ,
+.Pn log ,
+.Pn backups
+and
+.Pn db .
+.LP
+.DS
+.ft CW
+SRC=/oldroot/usr
+
+cd $SRC; tar cf - msgs preserve | (cd /var && tar xpf -)
+.DE
+.DS
+.ft CW
+# copy $SRC/spool to /var
+cd $SRC/spool
+tar cf - at mail rwho | (cd /var && tar xpf -)
+tar cf - ftp mqueue news secretmail uucp uucppublic | \e
+ (cd /var/spool && tar xpf -)
+.DE
+.DS
+.ft CW
+# everything else in spool is probably a printer area
+mkdir .save
+mv at ftp mail mqueue rwho secretmail uucp uucppublic .save
+tar cf - * | (cd /var/spool/output && tar xpf -)
+mv .save/* .
+rmdir .save
+.DE
+.DS
+.ft CW
+cd /var/spool/mqueue
+mv syslog.7 /var/log/maillog.7
+mv syslog.6 /var/log/maillog.6
+mv syslog.5 /var/log/maillog.5
+mv syslog.4 /var/log/maillog.4
+mv syslog.3 /var/log/maillog.3
+mv syslog.2 /var/log/maillog.2
+mv syslog.1 /var/log/maillog.1
+mv syslog.0 /var/log/maillog.0
+mv syslog /var/log/maillog
+.DE
+.DS
+.ft CW
+# move $SRC/adm to /var
+cd $SRC/adm
+tar cf - . | (cd /var/account && tar xpf -)
+cd /var/account
+rm -f msgbuf
+mv messages messages.[0-9] ../log
+mv wtmp wtmp.[0-9] ../log
+mv lastlog ../log
+.DE
+.Sh 2 "Bug fixes and changes between \*(Ps and \*(4B"
+.PP
+The major new facilities available in the \*(4B release are
+a new virtual memory system,
+the addition of ISO/OSI networking support,
+a new virtual filesystem interface supporting filesystem stacking,
+a freely redistributable implementation of NFS,
+a log-structured filesystem,
+enhancement of the local filesystems to support
+files and filesystems that are up to 2^63 bytes in size,
+enhanced security and system management support,
+and the conversion to and addition of the IEEE Std1003.1 (``POSIX'')
+facilities and many of the IEEE Std1003.2 facilities.
+In addition, many new utilities and additions to the C
+library are present as well.
+The kernel sources have been reorganized to collect all machine-dependent
+files for each architecture under one directory,
+and most of the machine-independent code is now free of code
+conditional on specific machines.
+The user structure and process structure have been reorganized
+to eliminate the statically-mapped user structure and to make most
+of the process resources shareable by multiple processes.
+The system and include files have been converted to be compatible
+with ANSI C, including function prototypes for most of the exported
+functions.
+There are numerous other changes throughout the system.
+.Sh 3 "Changes to the kernel"
+.PP
+This release includes several important structural kernel changes.
+The kernel uses a new internal system call convention;
+the use of global (``u-dot'') variables for parameters and error returns
+has been eliminated,
+and interrupted system calls no longer abort using non-local goto's (longjmp's).
+A new sleep interface separates signal handling from scheduling priority,
+returning characteristic errors to abort or restart the current system call.
+This sleep call also passes a string describing the process state,
+that is used by the ps(1) program.
+The old sleep interface can be used only for non-interruptible sleeps.
+The sleep interface (\fItsleep\fP) can be used at any priority,
+but is only interruptible if the PCATCH flag is set.
+When interrupted, \fItsleep\fP returns EINTR or ERESTART.
+.PP
+Many data structures that were previously statically allocated
+are now allocated dynamically.
+These structures include mount entries, file entries,
+user open file descriptors, the process entries, the vnode table,
+the name cache, and the quota structures.
+.PP
+To protect against indiscriminate reading or writing of kernel
+memory, all writing and most reading of kernel data structures
+must be done using a new ``sysctl'' interface.
+The information to be accessed is described through an extensible
+``Management Information Base'' (MIB) style name,
+described as a dotted set of components.
+A new utility,
+.Xr sysctl (8),
+retrieves kernel state and allows processes with appropriate
+privilege to set kernel state.
+.Sh 3 "Security"
+.PP
+The kernel runs with four different levels of security.
+Any superuser process can raise the security level, but only
+.Fn init (8)
+can lower it.
+Security levels are defined as follows:
+.IP \-1
+Permanently insecure mode \- always run system in level 0 mode.
+.IP " 0"
+Insecure mode \- immutable and append-only flags may be turned off.
+All devices may be read or written subject to their permissions.
+.IP " 1"
+Secure mode \- immutable and append-only flags may not be cleared;
+disks for mounted filesystems,
+.Pn /dev/mem ,
+and
+.Pn /dev/kmem
+are read-only.
+.IP " 2"
+Highly secure mode \- same as secure mode, plus disks are always
+read-only whether mounted or not.
+This level precludes tampering with filesystems by unmounting them,
+but also inhibits running
+.Xr newfs (8)
+while the system is multi-user.
+See
+.Xr chflags (1)
+and the \-\fBo\fP option to
+.Xr ls (1)
+for information on setting and displaying the immutable and append-only
+flags.
+.PP
+Normally, the system runs in level 0 mode while single user
+and in level 1 mode while multiuser.
+If the level 2 mode is desired while running multiuser,
+it can be set in the startup script
+.Pn /etc/rc
+using
+.Xr sysctl (1).
+If it is desired to run the system in level 0 mode while multiuser,
+the administrator must build a kernel with the variable
+.Li securelevel
+in the kernel source file
+.Pn /sys/kern/kern_sysctl.c
+initialized to \-1.
+.Sh 4 "Virtual memory changes"
+.PP
+The new virtual memory implementation is derived from the Mach
+operating system developed at Carnegie-Mellon,
+and was ported to the BSD kernel at the University of Utah.
+It is based on the 2.0 release of Mach
+(with some bug fixes from the 2.5 and 3.0 releases)
+and retains many of its essential features such as
+the separation of the machine dependent and independent layers
+(the ``pmap'' interface),
+efficient memory utilization using copy-on-write
+and other lazy-evaluation techniques,
+and support for large, sparse address spaces.
+It does not include the ``external pager'' interface instead using
+a primitive internal pager interface.
+The Mach virtual memory system call interface has been replaced with the
+``mmap''-based interface described in the ``Berkeley Software
+Architecture Manual'' (see UNIX Programmer's Manual,
+Supplementary Documents, PSD:5).
+The interface is similar to the interfaces shipped
+by several commercial vendors such as Sun, USL, and Convex Computer Corp.
+The integration of the new virtual memory is functionally complete,
+but still has serious performance problems under heavy memory load.
+The internal kernel interfaces have not yet been completed
+and the memory pool and buffer cache have not been merged.
+Some additional caveats:
+.IP \(bu
+Since the code is based on the 2.0 release of Mach,
+bugs and misfeatures of the BSD version should not be considered
+short-comings of the current Mach virtual memory system.
+.IP \(bu
+Because of the disjoint virtual memory (page) and IO (buffer) caches,
+it is possible to see inconsistencies if using both the mmap and
+read/write interfaces on the same file simultaneously.
+.IP \(bu
+Swap space is allocated on-demand rather than up front and no
+allocation checks are performed so it is possible to over-commit
+memory and eventually deadlock.
+.IP \(bu
+The semantics of the
+.Xr vfork (2)
+system call are slightly different.
+The synchronization between parent and child is preserved,
+but the memory sharing aspect is not.
+In practice this has been enough for backward compatibility,
+but newer code should just use
+.Xr fork (2).
+.Sh 4 "Networking additions and changes"
+.PP
+The ISO/OSI Networking consists of a kernel implementation of
+transport class 4 (TP-4),
+connectionless networking protocol (CLNP),
+and 802.3-based link-level support (hardware-compatible with Ethernet\**).
+.FS
+Ethernet is a trademark of the Xerox Corporation.
+.FE
+We also include support for ISO Connection-Oriented Network Service,
+X.25, TP-0.
+The session and presentation layers are provided outside
+the kernel using the ISO Development Environment by Marshall Rose,
+that is available via anonymous FTP
+(but is not included on the distribution tape).
+Included in this development environment are file
+transfer and management (FTAM), virtual terminals (VT),
+a directory services implementation (X.500),
+and miscellaneous other utilities.
+.PP
+Kernel support for the ISO OSI protocols is enabled with the ISO option
+in the kernel configuration file.
+The
+.Xr iso (4)
+manual page describes the protocols and addressing;
+see also
+.Xr clnp (4),
+.Xr tp (4)
+and
+.Xr cltp (4).
+The OSI equivalent to ARP is ESIS (End System to Intermediate System Routing
+Protocol); running this protocol is mandatory, however one can manually add
+translations for machines that do not participate by use of the
+.Xr route (8)
+command.
+Additional information is provided in the manual page describing
+.Xr esis (4).
+.PP
+The command
+.Xr route (8)
+has a new syntax and several new capabilities:
+it can install routes with a specified destination and mask,
+and can change route characteristics such as hop count, packet size
+and window size.
+.PP
+Several important enhancements have been added to the TCP/IP
+protocols including TCP header prediction and
+serial line IP (SLIP) with header compression.
+The routing implementation has been completely rewritten
+to use a hierarchical routing tree with a mask per route
+to support the arbitrary levels of routing found in the ISO protocols.
+The routing table also stores and caches route characteristics
+to speed the adaptation of the throughput and congestion avoidance
+algorithms.
+.PP
+The format of the
+.I sockaddr
+structure (the structure used to describe a generic network address with an
+address family and family-specific data)
+has changed from previous releases,
+as have the address family-specific versions of this structure.
+The
+.I sa_family
+family field has been split into a length,
+.Pn sa_len ,
+and a family,
+.Pn sa_family .
+System calls that pass a
+.I sockaddr
+structure into the kernel (e.g.
+.Fn sendto
+and
+.Fn connect )
+have a separate parameter that specifies the
+.I sockaddr
+length, and thus it is not necessary to fill in the
+.I sa_len
+field for those system calls.
+System calls that pass a
+.I sockaddr
+structure back from the kernel (e.g.
+.Fn recvfrom
+and
+.Fn accept )
+receive a completely filled-in
+.I sockaddr
+structure, thus the length field is valid.
+Because this would not work for old binaries,
+the new library uses a different system call number.
+Thus, most networking programs compiled under \*(4B are incompatible
+with older systems.
+.PP
+Although this change is mostly source and binary compatible
+with old programs, there are three exceptions.
+Programs with statically initialized
+.I sockaddr
+structures
+(usually the Internet form, a
+.I sockaddr_in )
+are not compatible.
+Generally, such programs should be changed to fill in the structure
+at run time, as C allows no way to initialize a structure without
+assuming the order and number of fields.
+Also, programs with use structures to describe a network packet format
+that contain embedded
+.I sockaddr
+structures also require change; a definition of an
+.I osockaddr
+structure is provided for this purpose.
+Finally, programs that use the
+.Sm SIOCGIFCONF
+ioctl to get a complete list of interface addresses
+need to check the
+.I sa_len
+field when iterating through the array of addresses returned,
+as not all the structures returned have the same length
+(this variance in length is nearly guaranteed by the presence of link-layer
+address structures).
+.Sh 4 "Additions and changes to filesystems"
+.PP
+The \*(4B distribution contains most of the interfaces
+specified in the IEEE Std1003.1 system interface standard.
+Filesystem additions include IEEE Std1003.1 FIFOs,
+byte-range file locking, and saved user and group identifiers.
+.PP
+A new virtual filesystem interface has been added to the
+kernel to support multiple filesystems.
+In comparison with other interfaces,
+the Berkeley interface has been structured for more efficient support
+of filesystems that maintain state (such as the local filesystem).
+The interface has been extended with support for stackable
+filesystems done at UCLA.
+These extensions allow for filesystems to be layered on top of each
+other and allow new vnode operations to be added without requiring
+changes to existing filesystem implementations.
+For example,
+the umap filesystem (see
+.Xr mount_umap (8))
+is used to mount a sub-tree of an existing filesystem
+that uses a different set of uids and gids than the local system.
+Such a filesystem could be mounted from a remote site via NFS or it
+could be a filesystem on removable media brought from some foreign
+location that uses a different password file.
+.PP
+Other new filesystems that may be stacked include the loopback filesystem
+.Xr mount_lofs (8),
+the kernel filesystem
+.Xr mount_kernfs (8),
+and the portal filesystem
+.Xr mount_portal (8).
+.PP
+The buffer cache in the kernel is now organized as a file block cache
+rather than a device block cache.
+As a consequence, cached blocks from a file
+and from the corresponding block device would no longer be kept consistent.
+The block device thus has little remaining value.
+Three changes have been made for these reasons:
+.IP 1)
+block devices may not be opened while they are mounted,
+and may not be mounted while open, so that the two versions of cached
+file blocks cannot be created,
+.IP 2)
+filesystem checks of the root now use the raw device
+to access the root filesystem, and
+.IP 3)
+the root filesystem is initially mounted read-only
+so that nothing can be written back to disk during or after change to
+the raw filesystem by
+.Xr fsck .
+.LP
+The root filesystem may be made writable while in single-user mode
+with the command:
+.DS
+.ft CW
+mount \-uw /
+.DE
+The mount command has an option to update the flags on a mounted filesystem,
+including the ability to upgrade a filesystem from read-only to read-write
+or downgrade it from read-write to read-only.
+.PP
+In addition to the local ``fast filesystem'',
+we have added an implementation of the network filesystem (NFS)
+that fully interoperates with the NFS shipped by Sun and its licensees.
+Because our NFS implementation was implemented
+by Rick Macklem of the University of Guelph
+using only the publicly available NFS specification,
+it does not require a license from Sun to use in source or binary form.
+By default it runs over UDP to be compatible with Sun's implementation.
+However, it can be configured on a per-mount basis to run over TCP.
+Using TCP allows it to be used quickly and efficiently through
+gateways and over long-haul networks.
+Using an extended protocol, it supports Leases to allow a limited
+callback mechanism that greatly reduces the network traffic necessary
+to maintain cache consistency between the server and its clients.
+Its use will be familiar to users of other implementations of NFS.
+See the manual pages
+.Xr mount (8),
+.Xr mountd (8),
+.Xr fstab (5),
+.Xr exports (5),
+.Xr netgroup (5),
+.Xr nfsd (8),
+.Xr nfsiod (8),
+and
+.Xr nfssvc (8).
+and the document ``The 4.4BSD NFS Implementation'' (SMM:6)
+for further information.
+The format of
+.Pn /etc/fstab
+has changed from previous \*(Bs releases
+to a blank-separated format to allow colons in pathnames.
+.PP
+A new local filesystem, the log-structured filesystem (LFS),
+has been added to the system.
+It provides near disk-speed output and fast crash recovery.
+This work is based, in part, on the LFS filesystem created
+for the Sprite operating system at Berkeley.
+While the kernel implementation is almost complete,
+only some of the utilities to support the
+filesystem have been written,
+so we do not recommend it for production use.
+See
+.Xr newlfs (8),
+.Xr mount_lfs (8)
+and
+.Xr lfs_cleanerd (8)
+for more information.
+For a in-depth description of the implementation and performance
+characteristics of log-structured filesystems in general,
+and this one in particular, see Dr. Margo Seltzer's doctoral thesis,
+available from the University of California Computer Science Department.
+.PP
+We have also added a memory-based filesystem that runs in
+pageable memory, allowing large temporary filesystems without
+requiring dedicated physical memory.
+.PP
+The local ``fast filesystem'' has been enhanced to do
+clustering that allows large pieces of files to be
+allocated contiguously resulting in near doubling
+of filesystem throughput.
+The filesystem interface has been extended to allow
+files and filesystems to grow to 2^63 bytes in size.
+The quota system has been rewritten to support both
+user and group quotas (simultaneously if desired).
+Quota expiration is based on time rather than
+the previous metric of number of logins over quota.
+This change makes quotas more useful on fileservers
+onto which users seldom login.
+.PP
+The system security has been greatly enhanced by the
+addition of additional file flags that permit a file to be
+marked as immutable or append only.
+Once set, these flags can only be cleared by the super-user
+when the system is running in insecure mode (normally, single-user).
+In addition to the immutable and append-only flags,
+the filesystem supports a new user-settable flag ``nodump''.
+(File flags are set using the
+.Xr chflags (1)
+utility.)
+When set on a file,
+.Xr dump (8)
+will omit the file from incremental backups
+but retain them on full backups.
+See the ``-h'' flag to
+.Xr dump (8)
+for details on how to change this default.
+The ``nodump'' flag is usually set on core dumps,
+system crash dumps, and object files generated by the compiler.
+Note that the flag is not preserved when files are copied
+so that installing an object file will cause it to be preserved.
+.PP
+The filesystem format used in \*(4B has several additions.
+Directory entries have an additional field,
+.Pn d_type ,
+that identifies the type of the entry
+(normally found in the
+.Pn st_mode
+field of the
+.Pn stat
+structure).
+This field is particularly useful for identifying
+directories without the need to use
+.Xr stat (2).
+.PP
+Short (less than sixty byte) symbolic links are now stored
+in the inode itself rather than in a separate data block.
+This saves disk space and makes access of symbolic links faster.
+Short symbolic links are not given a special type,
+so a user-level application is unaware of their special treatment.
+Unlike pre-\*(4B systems, symbolic links do
+not have an owner, group, access mode, times, etc.
+Instead, these attributes are taken from the directory that contains the link.
+The only attributes returned from an
+.Xr lstat (2)
+that refer to the symbolic link itself are the file type (S_IFLNK),
+size, blocks, and link count (always 1).
+.PP
+An implementation of an auto-mounter daemon,
+.Xr amd ,
+was contributed by Jan-Simon Pendry of the
+Imperial College of Science, Technology & Medicine.
+See the document ``AMD \- The 4.4BSD Automounter'' (SMM:13)
+for further information.
+.PP
+The directory
+.Pn /dev/fd
+contains special files
+.Pn 0
+through
+.Pn 63
+that, when opened, duplicate the corresponding file descriptor.
+The names
+.Pn /dev/stdin ,
+.Pn /dev/stdout
+and
+.Pn /dev/stderr
+refer to file descriptors 0, 1 and 2.
+See
+.Xr fd (4)
+and
+.Xr mount_fdesc (8)
+for more information.
+.Sh 4 "POSIX terminal driver changes"
+.PP
+The \*(4B system uses the IEEE P1003.1 (POSIX.1) terminal interface
+rather than the previous \*(Bs terminal interface.
+The terminal driver is similar to the System V terminal driver
+with the addition of the necessary extensions to get the
+functionality previously available in the \*(Ps terminal driver.
+Both the old
+.Xr ioctl
+calls and old options to
+.Xr stty (1)
+are emulated.
+This emulation is expected to be unavailable in many vendors releases,
+so conversion to the new interface is encouraged.
+.PP
+\*(4B also adds the IEEE Std1003.1 job control interface,
+that is similar to the \*(Ps job control interface,
+but adds a security model that was missing in the
+\*(Ps job control implementation.
+A new system call,
+.Fn setsid ,
+creates a job-control session consisting of a single process
+group with one member, the caller, that becomes a session leader.
+Only a session leader may acquire a controlling terminal.
+This is done explicitly via a
+.Sm TIOCSCTTY
+.Fn ioctl
+call, not implicitly by an
+.Fn open
+call.
+The call fails if the terminal is in use.
+Programs that allocate controlling terminals (or pseudo-terminals)
+require change to work in this environment.
+The versions of
+.Xr xterm
+provided in the X11R5 release includes the necessary changes.
+New library routines are available for allocating and initializing
+pseudo-terminals and other terminals as controlling terminal; see
+.Pn /usr/src/lib/libutil/pty.c
+and
+.Pn /usr/src/lib/libutil/login_tty.c .
+.PP
+The POSIX job control model formalizes the previous conventions
+used in setting up a process group.
+Unfortunately, this requires that changes be made in a defined order
+and with some synchronization that were not necessary in the past.
+Older job control shells (csh, ksh) will generally not operate correctly
+with the new system.
+.PP
+Most of the other kernel interfaces have been changed to correspond
+with the POSIX.1 interface, although that work is not complete.
+See the relevant manual pages and the IEEE POSIX standard.
+.Sh 4 "Native operating system compatibility"
+.PP
+Both the HP300 and SPARC ports feature the ability to run binaries
+built for the native operating system (HP-UX or SunOS) by emulating
+their system calls.
+Building an HP300 kernel with the HPUXCOMPAT and COMPAT_OHPUX options
+or a SPARC kernel with the COMPAT_SUNOS option will enable this feature
+(on by default in the generic kernel provided in the root filesystem image).
+Though this native operating system compatibility was provided by the
+developers as needed for their purposes and is by no means complete,
+it is complete enough to run several non-trivial applications including
+those that require HP-UX or SunOS shared libraries.
+For example, the vendor supplied X11 server and windowing environment
+can be used on both the HP300 and SPARC.
+.PP
+It is important to remember that merely copying over a native binary
+and executing it (or executing it directly across NFS) does not imply
+that it will run.
+All but the most trivial of applications are likely to require access
+to auxiliary files that do not exist under \*(4B (e.g.
+.Pn /etc/ld.so.cache )
+or have a slightly different format (e.g.
+.Pn /etc/passwd ).
+However, by using system call tracing and
+through creative use of symlinks,
+many problems can be tracked down and corrected.
+.PP
+The DECstation port also has code for ULTRIX emulation
+(kernel option ULTRIXCOMPAT, not compiled into the generic kernel)
+but it was used primarily for initially bootstrapping the port and
+has not been used since.
+Hence, some work may be required to make it generally useful.
+.Sh 3 "Changes to the utilities"
+.PP
+We have been tracking the IEEE Std1003.2 shell and utility work
+and have included prototypes of many of the proposed utilities
+based on draft 12 of the POSIX.2 Shell and Utilities document.
+Because most of the traditional utilities have been replaced
+with implementations conformant to the POSIX standards,
+you should realize that the utility software may not be as stable,
+reliable or well documented as in traditional Berkeley releases.
+In particular, almost the entire manual suite has been rewritten to
+reflect the POSIX defined interfaces, and in some instances
+it does not correctly reflect the current state of the software.
+It is also worth noting that, in rewriting this software, we have generally
+been rewarded with significant performance improvements.
+Most of the libraries and header files have been converted
+to be compliant with ANSI C.
+The shipped compiler (gcc) is a superset of ANSI C,
+but supports traditional C as a command-line option.
+The system libraries and utilities all compile
+with either ANSI or traditional C.
+.Sh 4 "Make and Makefiles"
+.PP
+This release uses a completely new version of the
+.Xr make
+program derived from the
+.Xr pmake
+program developed by the Sprite project at Berkeley.
+It supports existing makefiles, although certain incorrect makefiles
+may fail.
+The makefiles for the \*(4B sources make extensive use of the new
+facilities, especially conditionals and file inclusion, and are thus
+completely incompatible with older versions of
+.Xr make
+(but nearly all the makefiles are now trivial!).
+The standard include files for
+.Xr make
+are in
+.Pn /usr/share/mk .
+There is a
+.Pn bsd.README
+file in
+.Pn /usr/src/share/mk .
+.PP
+Another global change supported by the new
+.Xr make
+is designed to allow multiple architectures to share a copy of the sources.
+If a subdirectory named
+.Pn obj
+is present in the current directory,
+.Xr make
+descends into that directory and creates all object and other files there.
+We use this by building a directory hierarchy in
+.Pn /var/obj
+that parallels
+.Pn /usr/src .
+We then create the
+.Pn obj
+subdirectories in
+.Pn /usr/src
+as symbolic links to the corresponding directories in
+.Pn /var/obj .
+(This step is automated.
+The command ``make obj'' in
+.Pn /usr/src
+builds both the local symlink and the shadow directory,
+using
+.Pn /usr/obj ,
+that may be a symbolic link, as the root of the shadow tree.
+The use of
+.Pn /usr/obj
+is for historic reasons only, and the system make configuration files in
+.Pn /usr/share/mk
+can trivially be modified to use
+.Pn /var/obj
+instead.)
+We have one
+.Pn /var/obj
+hierarchy on the local system, and another on each
+system that shares the source filesystem.
+All the sources in
+.Pn /usr/src
+except for
+.Pn /usr/src/contrib
+and portions of
+.Pn /usr/src/old
+have been converted to use the new make and
+.Pn obj
+subdirectories;
+this change allows compilation for multiple
+architectures from the same source tree
+(that may be mounted read-only).
+.Sh 4 "Kerberos"
+.PP
+The Kerberos authentication server from MIT (version 4)
+is included in this release.
+See
+.Xr kerberos (1)
+for a general, if MIT-specific, introduction.
+If it is configured,
+.Xr login (1),
+.Xr passwd (1),
+.Xr rlogin (1)
+and
+.Xr rsh (1)
+will all begin to use it automatically.
+The file
+.Pn /etc/kerberosIV/README
+describes the configuration.
+Each system needs the file
+.Pn /etc/kerberosIV/krb.conf
+to set its realm and local servers,
+and a private key stored in
+.Pn /etc/kerberosIV/srvtab
+(see
+.Xr ext_srvtab (8)).
+The Kerberos server should be set up on a single, physically secure,
+server machine.
+Users and hosts may be added to the server database manually with
+.Xr kdb_edit (8),
+or users on authorized hosts can add themselves and a Kerberos
+password after verification of their ``local'' (passwd-file) password
+using the
+.Xr register (1)
+program.
+.PP
+Note that by default the password-changing program
+.Xr passwd (1)
+changes the Kerberos password, that must exist.
+The
+.Li \-l
+option to
+.Xr passwd (1)
+changes the ``local'' password if one exists.
+.PP
+Note that Version 5 of Kerberos will be released soon;
+Version 4 should probably be replaced at that time.
+.Sh 4 "Timezone support"
+.PP
+The timezone conversion code in the C library uses data files installed in
+.Pn /usr/share/zoneinfo
+to convert from ``GMT'' to various timezones. The data file for the default
+timezone for the system should be copied to
+.Pn /etc/localtime .
+Other timezones can be selected by setting the TZ environment variable.
+.PP
+The data files initially installed in
+.Pn /usr/share/zoneinfo
+include corrections for leap seconds since the beginning of 1970.
+Thus, they assume that the
+kernel will increment the time at a constant rate during a leap second;
+that is, time just keeps on ticking. The conversion routines will then
+name a leap second 23:59:60. For purists, this effectively means that
+the kernel maintains TAI (International Atomic Time) rather than UTC
+(Coordinated Universal Time, aka GMT).
+.PP
+For systems that run current NTP (Network Time Protocol) implementations
+or that wish to conform to the letter of the POSIX.1 law, it is possible
+to rebuild the timezone data files so that leap seconds are not counted.
+(NTP causes the time to jump over a leap second, and POSIX effectively
+requires the clock to be reset by hand when a leap second occurs.
+In this mode, the kernel effectively runs UTC rather than TAI.)
+.PP
+The data files without leap second information
+are constructed from the source directory,
+.Pn /usr/src/share/zoneinfo .
+Change the variable REDO in Makefile
+from ``right'' to ``posix'', and then do
+.DS
+make obj (if necessary)
+make
+make install
+.DE
+.PP
+You will then need to copy the correct default zone file to
+.Pn /etc/localtime ,
+as the old one would still have used leap seconds, and because the Makefile
+installs a default
+.Pn /etc/localtime
+each time ``make install'' is done.
+.PP
+It is possible to install both sets of timezone data files. This results
+in subdirectories
+.Pn /usr/share/zoneinfo/right
+and
+.Pn /usr/share/zoneinfo/posix .
+Each contain a complete set of zone files.
+See
+.Pn /usr/src/share/zoneinfo/Makefile
+for details.
+.Sh 4 "Additions and changes to the libraries"
+.PP
+Notable additions to the libraries include functions to traverse a
+filesystem hierarchy, database interfaces to btree and hashing functions,
+a new, faster implementation of stdio and a radix and merge sort
+functions.
+.PP
+The
+.Xr fts (3)
+functions will do either physical or logical traversal of
+a file hierarchy as well as handle essentially infinite depth
+filesystems and filesystems with cycles.
+All the utilities in \*(4B which traverse file hierarchies
+have been converted to use
+.Xr fts (3).
+The conversion has always resulted in a significant performance
+gain, often of four or five to one in system time.
+.PP
+The
+.Xr dbopen (3)
+functions are intended to be a family of database access methods.
+Currently, they consist of
+.Xr hash (3),
+an extensible, dynamic hashing scheme,
+.Xr btree (3),
+a sorted, balanced tree structure (B+tree's), and
+.Xr recno (3),
+a flat-file interface for fixed or variable length records
+referenced by logical record number.
+Each of the access methods stores associated key/data pairs and
+uses the same record oriented interface for access.
+.PP
+The
+.Xr qsort (3)
+function has been rewritten for additional performance.
+In addition, three new types of sorting functions,
+.Xr heapsort (3),
+.Xr mergesort (3)
+and
+.Xr radixsort (3)
+have been added to the system.
+The
+.Xr mergesort
+function is optimized for data with pre-existing order,
+in which case it usually significantly outperforms
+.Xr qsort .
+The
+.Xr radixsort (3)
+functions are variants of most-significant-byte radix sorting.
+They take time linear to the number of bytes to be
+sorted, usually significantly outperforming
+.Xr qsort
+on data that can be sorted in this fashion.
+An implementation of the POSIX 1003.2 standard
+.Xr sort (1),
+based on
+.Xr radixsort ,
+is included in
+.Pn /usr/src/contrib/sort .
+.PP
+Some additional comments about the \*(4B C library:
+.IP \(bu
+The floating point support in the C library has been replaced
+and is now accurate.
+.IP \(bu
+The C functions specified by both ANSI C, POSIX 1003.1 and
+1003.2 are now part of the C library.
+This includes support for file name matching, shell globbing
+and both basic and extended regular expressions.
+.IP \(bu
+ANSI C multibyte and wide character support has been integrated.
+The rune functionality from the Bell Labs' Plan 9 system is provided
+as well.
+.IP \(bu
+The
+.Xr termcap (3)
+functions have been generalized and replaced with a general
+purpose interface named
+.Xr getcap (3).
+.IP \(bu
+The
+.Xr stdio (3)
+routines have been replaced, and are usually much faster.
+In addition, the
+.Xr funopen (3)
+interface permits applications to provide their own I/O stream
+function support.
+.PP
+The
+.Xr curses (3)
+library has been largely rewritten.
+Important additional features include support for scrolling and
+.Xr termios (3).
+.PP
+An application front-end editing library, named libedit, has been
+added to the system.
+.PP
+A superset implementation of the SunOS kernel memory interface library,
+libkvm, has been integrated into the system.
+.PP
+.Sh 4 "Additions and changes to other utilities"
+.PP
+There are many new utilities, offering many new capabilities,
+in \*(4B.
+Skimming through the section 1 and section 8 manual pages is sure
+to be useful.
+The additions to the utility suite include greatly enhanced versions of
+programs that display system status information, implementations of
+various traditional tools described in the IEEE Std1003.2 standard,
+new tools not previous available on Berkeley UNIX systems,
+and many others.
+Also, with only a very few exceptions, all the utilities from
+\*(Ps that included proprietary source code have been replaced,
+and their \*(4B counterparts are freely redistributable.
+Normally, this replacement resulted in significant performance
+improvements and the increase of the limits imposed on data by
+the utility as well.
+.PP
+A summary of specific additions and changes are as follows:
+.TS
+lfC l.
+amd An auto-mounter implementation.
+ar Replacement of the historic archive format with a new one.
+awk Replaced by gawk; see /usr/src/old/awk for the historic version.
+bdes Utility implementing DES modes of operation described in FIPS PUB 81.
+calendar Addition of an interface for system calendars.
+cap_mkdb Utility for building hashed versions of termcap style databases.
+cc Replacement of pcc with gcc suite.
+chflags A utility for setting the per-file user and system flags.
+chfn An editor based replacement for changing user information.
+chpass An editor based replacement for changing user information.
+chsh An editor based replacement for changing user information.
+cksum The POSIX 1003.2 checksum utility; compatible with sum.
+column A columnar text formatting utility.
+cp POSIX 1003.2 compatible, able to copy special files.
+csh Freely redistributable and 8-bit clean.
+date User specified formats added.
+dd New EBCDIC conversion tables, major performance improvements.
+dev_mkdb Hashed interface to devices.
+dm Dungeon master.
+find Several new options and primaries, major performance improvements.
+fstat Utility displaying information on files open on the system.
+ftpd Connection logging added.
+hexdump A binary dump utility, superseding od.
+id The POSIX 1003.2 user identification utility.
+inetd Tcpmux added.
+jot A text formatting utility.
+kdump A system-call tracing facility.
+ktrace A system-call tracing facility.
+kvm_mkdb Hashed interface to the kernel name list.
+lam A text formatting utility.
+lex A new, freely redistributable, significantly faster version.
+locate A database of the system files, by name, constructed weekly.
+logname The POSIX 1003.2 user identification utility.
+mail.local New local mail delivery agent, replacing mail.
+make Replaced with a new, more powerful make, supporting include files.
+man Added support for man page location configuration.
+mkdep A new utility for generating make dependency lists.
+mkfifo The POSIX 1003.2 FIFO creation utility.
+mtree A new utility for mapping file hierarchies to a file.
+nfsstat An NFS statistics utility.
+nvi A freely redistributable replacement for the ex/vi editors.
+pax The POSIX 1003.2 replacement for cpio and tar.
+printf The POSIX 1003.2 replacement for echo.
+roff Replaced by groff; see /usr/src/old/roff for the historic versions.
+rs New utility for text formatting.
+shar An archive building utility.
+sysctl MIB-style interface to system state.
+tcopy Fast tape-to-tape copying and verification.
+touch Time and file reference specifications.
+tput The POSIX 1003.2 terminal display utility.
+tr Addition of character classes.
+uname The POSIX 1003.2 system identification utility.
+vis A filter for converting and displaying non-printable characters.
+xargs The POSIX 1003.2 argument list constructor utility.
+yacc A new, freely redistributable, significantly faster version.
+.TE
+.PP
+The new versions of
+.Xr lex (1)
+(``flex'') and
+.Xr yacc (1)
+(``zoo'') should be installed early on if attempting to
+cross-compile \*(4B on another system.
+Note that the new
+.Xr lex
+program is not completely backward compatible with historic versions of
+.Xr lex ,
+although it is believed that all documented features are supported.
+.PP
+The
+.Xr find
+utility has two new options that are important to be aware of if you
+intend to use NFS.
+The ``fstype'' and ``prune'' options can be used together to prevent
+find from crossing NFS mount points.
+See
+.Pn /etc/daily
+for an example of their use.
+.Sh 2 "Hints on converting from \*(Ps to \*(4B"
+.PP
+This section summarizes changes between
+\*(Ps and \*(4B that are likely to
+cause difficulty in doing the conversion.
+It does not include changes in the network;
+see section 5 for information on setting up the network.
+.PP
+Since the stat st_size field is now 64-bits instead of 32,
+doing something like:
+.DS
+.ft CW
+foo(st.st_size);
+.DE
+and then (improperly) defining foo with an ``int'' or ``long'' parameter:
+.DS
+.ft CW
+foo(size)
+ int size;
+{
+ ...
+}
+.DE
+will fail miserably (well, it might work on a little endian machine).
+This problem showed up in
+.Xr emacs (1)
+as well as several other programs.
+A related problem is improperly casting (or failing to cast)
+the second argument to
+.Xr lseek (2),
+.Xr truncate (2),
+or
+.Xr ftruncate (2)
+ala:
+.DS
+.ft CW
+lseek(fd, (long)off, 0);
+.DE
+or
+.DS
+.ft CW
+lseek(fd, 0, 0);
+.DE
+The best solution is to include
+.Pn <unistd.h>
+which has prototypes that catch these types of errors.
+.PP
+Determining the ``namelen'' parameter for a
+.Xr connect (2)
+call on a unix domain socket should use the ``SUN_LEN'' macro from
+.Pn <sys/un.h> .
+One old way that was used:
+.DS
+.ft CW
+addrlen = strlen(unaddr.sun_path) + sizeof(unaddr.sun_family);
+.DE
+no longer works as there is an additional
+.Pn sun_len
+field.
+.PP
+The kernel's limit on the number of open files has been
+increased from 20 to 64.
+It is now possible to change this limit almost arbitrarily.
+The standard I/O library
+autoconfigures to the kernel limit.
+Note that file (``_iob'') entries may be allocated by
+.Xr malloc
+from
+.Xr fopen ;
+this allocation has been known to cause problems with programs
+that use their own memory allocators.
+Memory allocation does not occur until after 20 files have been opened
+by the standard I/O library.
+.PP
+.Xr Select
+can be used with more than 32 descriptors
+by using arrays of \fBint\fPs for the bit fields rather than single \fBint\fPs.
+Programs that used
+.Xr getdtablesize
+as their first argument to
+.Xr select
+will no longer work correctly.
+Usually the program can be modified to correctly specify the number
+of bits in an \fBint\fP.
+Alternatively the program can be modified to use an array of \fBint\fPs.
+There are a set of macros available in
+.Pn <sys/types.h>
+to simplify this.
+See
+.Xr select (2).
+.PP
+Old core files will not be intelligible by the current debuggers
+because of numerous changes to the user structure
+and because the kernel stack has been enlarged.
+The
+.Xr a.out
+header that was in the user structure is no longer present.
+Locally-written debuggers that try to check the magic number
+will need to be changed.
+.PP
+Files may not be deleted from directories having the ``sticky'' (ISVTX) bit
+set in their modes
+except by the owner of the file or of the directory, or by the superuser.
+This is primarily to protect users' files in publicly-writable directories
+such as
+.Pn /tmp
+and
+.Pn /var/tmp .
+All publicly-writable directories should have their ``sticky'' bits set
+with ``chmod +t.''
+.PP
+The following two sections contain additional notes about
+changes in \*(4B that affect the installation of local files;
+be sure to read them as well.
diff --git a/share/doc/smm/01.setup/4.t b/share/doc/smm/01.setup/4.t
new file mode 100644
index 0000000..fee3fc2
--- /dev/null
+++ b/share/doc/smm/01.setup/4.t
@@ -0,0 +1,713 @@
+.\" Copyright (c) 1980, 1986, 1988 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)4.t 8.1 (Berkeley) 7/29/93
+.\"
+.ds LH "Installing/Operating \*(4B
+.ds CF \*(Dy
+.ds RH "System setup
+.Sh 1 "System setup"
+.PP
+This section describes procedures used to set up a \*(4B UNIX system.
+These procedures are used when a system is first installed
+or when the system configuration changes. Procedures for normal
+system operation are described in the next section.
+.Sh 2 "Kernel configuration"
+.PP
+This section briefly describes the layout of the kernel code and
+how files for devices are made.
+For a full discussion of configuring
+and building system images, consult the document ``Building
+4.3BSD UNIX Systems with Config'' (SMM:2).
+.Sh 3 "Kernel organization"
+.PP
+As distributed, the kernel source is in a
+separate tar image. The source may be physically
+located anywhere within any filesystem so long as
+a symbolic link to the location is created for the file
+.Pn /sys
+(many files in
+.Pn /usr/include
+are normally symbolic links relative to
+.Pn /sys ).
+In further discussions of the system source all path names
+will be given relative to
+.Pn /sys .
+.LP
+The kernel is made up of several large generic parts:
+.TS
+l l l.
+sys main kernel header files
+kern kernel functions broken down as follows
+ init system startup, syscall dispatching, entry points
+ kern scheduling, descriptor handling and generic I/O
+ sys process management, signals
+ tty terminal handling and job control
+ vfs filesystem management
+ uipc interprocess communication (sockets)
+ subr miscellaneous support routines
+vm virtual memory management
+ufs local filesystems broken down as follows
+ ufs common local filesystem routines
+ ffs fast filesystem
+ lfs log-based filesystem
+ mfs memory based filesystem
+nfs Sun-compatible network filesystem
+miscfs miscellaneous filesystems broken down as follows
+ deadfs where rejected vnodes go to die
+ fdesc access to per-process file descriptors
+ fifofs IEEE Std1003.1 FIFOs
+ kernfs filesystem access to kernel data structures
+ lofs loopback filesystem
+ nullfs another loopback filesystem
+ portal associate processes with filesystem locations
+ specfs device special files
+ umapfs provide alternate uid/gid mappings
+dev generic device drivers (SCSI, vnode, concatenated disk)
+.TE
+.LP
+The networking code is organized by protocol
+.TS
+l l.
+net routing and generic interface drivers
+netinet Internet protocols (TCP, UDP, IP, etc)
+netiso ISO protocols (TP-4, CLNP, CLTP, etc)
+netns Xerox network systems protocols (IDP, SPP, etc)
+netx25 CCITT X.25 protocols (X.25 Packet Level, HDLC/LAPB)
+.TE
+.LP
+A separate subdirectory is provided for each machine architecture
+.TS
+l l.
+hp300 HP 9000/300 series of Motorola 68000-based machines
+hp code common to both HP 68k and (non-existent) PA-RISC ports
+i386 Intel 386/486-based PC machines
+luna68k Omron 68000-based workstations
+news3400 Sony News MIPS-based workstations
+pmax Digital 3100/5000 MIPS-based workstations
+sparc Sun Microsystems SPARCstation 1, 1+, and 2
+tahoe (deprecated) CCI Power 6-series machines
+vax (deprecated) Digital VAX machines
+.TE
+.LP
+Each machine directory is subdivided by function;
+for example the hp300 directory contains
+.TS
+l l.
+include exported machine-dependent header files
+hp300 machine-dependent support code and private header files
+dev device drivers
+conf configuration files
+stand machine-dependent standalone code
+.TE
+.LP
+Other kernel related directories
+.TS
+l l.
+compile area to compile kernels
+conf machine-independent configuration files
+stand machine-independent standalone code
+.TE
+.Sh 3 "Devices and device drivers"
+.PP
+Devices supported by UNIX are implemented in the kernel
+by drivers whose source is kept in
+.Pn /sys/<architecture>/dev .
+These drivers are loaded
+into the system when included in a cpu specific configuration file
+kept in the conf directory. Devices are accessed through special
+files in the filesystem, made by the
+.Xr mknod (8)
+program and normally kept in the
+.Pn /dev
+directory.
+For all the devices supported by the distribution system, the
+files in
+.Pn /dev
+are created by the
+.Pn /dev/MAKEDEV
+shell script.
+.PP
+Determine the set of devices that you have and create a new
+.Pn /dev
+directory by running the MAKEDEV script.
+First create a new directory
+.Pn /newdev ,
+copy MAKEDEV into it, edit the file MAKEDEV.local
+to provide an entry for local needs,
+and run it to generate a
+.Pn /newdev directory.
+For instance,
+.DS
+\fB#\fP \fIcd /\fP
+\fB#\fP \fImkdir newdev\fP
+\fB#\fP \fIcp dev/MAKEDEV newdev/MAKEDEV\fP
+\fB#\fP \fIcd newdev\fP
+\fB#\fP \fIMAKEDEV \*(Dk0 pt0 std LOCAL\fP
+.DE
+Note the ``std'' argument causes standard devices such as
+.Pn /dev/console ,
+the machine console, to be created.
+.PP
+You can then do
+.DS
+\fB#\fP \fIcd /\fP
+\fB#\fP \fImv dev olddev ; mv newdev dev\fP
+\fB#\fP \fIsync\fP
+.DE
+to install the new device directory.
+.Sh 3 "Building new system images"
+.PP
+The kernel configuration of each UNIX system is described by
+a single configuration file, stored in the
+.Pn /sys/<architecture>/conf
+directory.
+To learn about the format of this file and the procedure used
+to build system images,
+start by reading ``Building 4.3BSD UNIX Systems with Config'' (SMM:2),
+look at the manual pages in section 4
+of the UNIX manual for the devices you have,
+and look at the sample configuration files in the
+.Pn /sys/<architecture>/conf
+directory.
+.PP
+The configured system image
+.Pn vmunix
+should be copied to the root, and then booted to try it out.
+It is best to name it
+.Pn /newvmunix
+so as not to destroy the working system until you are sure it does work:
+.DS
+\fB#\fP \fIcp vmunix /newvmunix\fP
+\fB#\fP \fIsync\fP
+.DE
+It is also a good idea to keep the previous system around under some other
+name. In particular, we recommend that you save the generic distribution
+version of the system permanently as
+.Pn /genvmunix
+for use in emergencies.
+To boot the new version of the system you should follow the
+bootstrap procedures outlined in section 6.1.
+After having booted and tested the new system, it should be installed as
+.Pn /vmunix
+before going into multiuser operation.
+A systematic scheme for numbering and saving old versions
+of the system may be useful.
+.Sh 2 "Configuring terminals"
+.PP
+If UNIX is to support simultaneous
+access from directly-connected terminals other than the console,
+the file
+.Pn /etc/ttys
+(see
+.Xr ttys (5))
+must be edited.
+.PP
+To add a new terminal device, be sure the device is configured into the system
+and that the special files for the device have been made by
+.Pn /dev/MAKEDEV .
+Then, enable the appropriate lines of
+.Pn /etc/ttys
+by setting the ``status''
+field to \fBon\fP (or add new lines).
+Note that lines in
+.Pn /etc/ttys
+are one-for-one with entries in the file of current users
+(see
+.Pn /var/run/utmp ),
+and therefore it is best to make changes
+while running in single-user mode
+and to add all the entries for a new device at once.
+.PP
+Each line in the
+.Pn /etc/ttys
+file is broken into four tab separated
+fields (comments are shown by a `#' character and extend to
+the end of the line). For each terminal line the four fields
+are:
+the device (without a leading
+.Pn /dev ),
+the program
+.Pn /sbin/init
+should startup to service the line
+(or \fBnone\fP if the line is to be left alone),
+the terminal type (found in
+.Pn /usr/share/misc/termcap ),
+and optional status information describing if the terminal is
+enabled or not and if it is ``secure'' (i.e. the super user should
+be allowed to login on the line).
+If the console is marked as ``insecure'',
+then the root password is required to bring the machine up single-user.
+All fields are character strings
+with entries requiring embedded white space enclosed in double
+quotes.
+Thus a newly added terminal
+.Pn /dev/tty00
+could be added as
+.DS
+tty00 "/usr/libexec/getty std.9600" vt100 on secure # mike's office
+.DE
+The std.9600 parameter provided to
+.Pn /usr/libexec/getty
+is used in searching the file
+.Pn /etc/gettytab ;
+it specifies a terminal's characteristics (such as baud rate).
+To make custom terminal types, consult
+.Xr gettytab (5)
+before modifying
+.Pn /etc/gettytab .
+.PP
+Dialup terminals should be wired so that carrier is asserted only when the
+phone line is dialed up.
+For non-dialup terminals, from which modem control is not available,
+you must wire back the signals so that
+the carrier appears to always be present. For further details,
+find your terminal driver in section 4 of the manual.
+.PP
+For network terminals (i.e. pseudo terminals), no program should
+be started up on the lines. Thus, the normal entry in
+.Pn /etc/ttys
+would look like
+.DS
+ttyp0 none network
+.DE
+(Note, the fourth field is not needed here.)
+.PP
+When the system is running multi-user, all terminals that are listed in
+.Pn /etc/ttys
+as \fBon\fP have their line enabled.
+If, during normal operations, you wish
+to disable a terminal line, you can edit the file
+.Pn /etc/ttys
+to change the terminal's status to \fBoff\fP and
+then send a hangup signal to the
+.Xr init
+process, by doing
+.DS
+\fB#\fP \fIkill \-1 1\fP
+.DE
+Terminals can similarly be enabled by changing the status field
+from \fBoff\fP to \fBon\fP and sending a hangup signal to
+.Xr init .
+.PP
+Note that if a special file is inaccessible when
+.Xr init
+tries to create a process for it,
+.Xr init
+will log a message to the
+system error logging process (see
+.Xr syslogd (8))
+and try to reopen the terminal every minute, reprinting the warning
+message every 10 minutes. Messages of this sort are normally
+printed on the console, though other actions may occur depending
+on the configuration information found in
+.Pn /etc/syslog.conf .
+.PP
+Finally note that you should change the names of any dialup
+terminals to ttyd?
+where ? is in [0-9a-zA-Z], as some programs use this property of the
+names to determine if a terminal is a dialup.
+Shell commands to do this should be put in the
+.Pn /dev/MAKEDEV.local
+script.
+.PP
+While it is possible to use truly arbitrary strings for terminal names,
+the accounting and noticeably the
+.Xr ps (1)
+command make good use of the convention that tty names
+(by default, and also after dialups are named as suggested above)
+are distinct in the last 2 characters.
+Change this and you may be sorry later, as the heuristic
+.Xr ps (1)
+uses based on these conventions will then break down and
+.Xr ps
+will run MUCH slower.
+.Sh 2 "Adding users"
+.PP
+The procedure for adding a new user is described in
+.Xr adduser (8).
+You should add accounts for the initial user community, giving
+each a directory and a password, and putting users who will wish
+to share software in the same groups.
+.PP
+Several guest accounts have been provided on the distribution
+system; these accounts are for people at Berkeley,
+Bell Laboratories, and others
+who have done major work on UNIX in the past. You can delete these accounts,
+or leave them on the system if you expect that these people would have
+occasion to login as guests on your system.
+.Sh 2 "Site tailoring"
+.PP
+All programs that require the site's name, or some similar
+characteristic, obtain the information through system calls
+or from files located in
+.Pn /etc .
+Aside from parts of the
+system related to the network, to tailor the system to your
+site you must simply select a site name, then edit the file
+.DS
+/etc/netstart
+.DE
+The first lines in
+.Pn /etc/netstart
+use a variable to set the hostname,
+.DS
+hostname=\fImysitename\fP
+/bin/hostname $hostname
+.DE
+to define the value returned by the
+.Xr gethostname (2)
+system call. If you are running the name server, your site
+name should be your fully qualified domain name. Programs such as
+.Xr getty (8),
+.Xr mail (1),
+.Xr wall (1),
+and
+.Xr uucp (1)
+use this system call so that the binary images are site
+independent.
+.PP
+You will also need to edit
+.Pn /etc/netstart
+to do the network interface initialization using
+.Xr ifconfig (8).
+If you are not sure how to do this, see sections 5.1, 5.2, and 5.3.
+If you are not running a routing daemon and have
+more than one Ethernet in your environment
+you will need to set up a default route;
+see section 5.4 for details.
+Before bringing your system up multiuser,
+you should ensure that the networking is properly configured.
+The network is started by running
+.Pn /etc/netstart .
+Once started, you should test connectivity using
+.Xr ping (8).
+You should first test connectivity to yourself,
+then another host on your Ethernet,
+and finally a host on another Ethernet.
+The
+.Xr netstat (8)
+program can be used to inspect and debug
+your routes; see section 5.4.
+.Sh 2 "Setting up the line printer system"
+.PP
+The line printer system consists of at least
+the following files and commands:
+.DS
+.TS
+l l.
+/usr/bin/lpq spooling queue examination program
+/usr/bin/lprm program to delete jobs from a queue
+/usr/bin/lpr program to enter a job in a printer queue
+/etc/printcap printer configuration and capability database
+/usr/sbin/lpd line printer daemon, scans spooling queues
+/usr/sbin/lpc line printer control program
+/etc/hosts.lpd list of host allowed to use the printers
+.TE
+.DE
+.PP
+The file
+.Pn /etc/printcap
+is a master database describing line
+printers directly attached to a machine and, also, printers
+accessible across a network. The manual page
+.Xr printcap (5)
+describes the format of this database and also
+shows the default values for such things as the directory
+in which spooling is performed. The line printer system handles
+multiple printers, multiple spooling queues, local and remote
+printers, and also printers attached via serial lines that require
+line initialization such as the baud rate. Raster output devices
+such as a Varian or Versatec, and laser printers such as an Imagen,
+are also supported by the line printer system.
+.PP
+Remote spooling via the network is handled with two spooling
+queues, one on the local machine and one on the remote machine.
+When a remote printer job is started with
+.Xr lpr ,
+the job is queued locally and a daemon process created to oversee the
+transfer of the job to the remote machine. If the destination
+machine is unreachable, the job will remain queued until it is
+possible to transfer the files to the spooling queue on the
+remote machine. The
+.Xr lpq
+program shows the contents of spool
+queues on both the local and remote machines.
+.PP
+To configure your line printers, consult the printcap manual page
+and the accompanying document, ``4.3BSD Line Printer Spooler Manual'' (SMM:7).
+A call to the
+.Xr lpd
+program should be present in
+.Pn /etc/rc .
+.Sh 2 "Setting up the mail system"
+.PP
+The mail system consists of the following commands:
+.DS
+.TS
+l l.
+/usr/bin/mail UCB mail program, described in \fImail\fP\|(1)
+/usr/sbin/sendmail mail routing program
+/var/spool/mail mail spooling directory
+/var/spool/secretmail secure mail directory
+/usr/bin/xsend secure mail sender
+/usr/bin/xget secure mail receiver
+/etc/aliases mail forwarding information
+/usr/bin/newaliases command to rebuild binary forwarding database
+/usr/bin/biff mail notification enabler
+/usr/libexec/comsat mail notification daemon
+.TE
+.DE
+Mail is normally sent and received using the
+.Xr mail (1)
+command (found in
+.Pn /usr/bin/mail ),
+which provides a front-end to edit the messages sent
+and received, and passes the messages to
+.Xr sendmail (8)
+for routing.
+The routing algorithm uses knowledge of the network name syntax,
+aliasing and forwarding information, and network topology, as
+defined in the configuration file
+.Pn /usr/lib/sendmail.cf ,
+to process each piece of mail.
+Local mail is delivered by giving it to the program
+.Pn /usr/libexec/mail.local
+that adds it to the mailboxes in the directory
+.Pn /var/spool/mail/<username> ,
+using a locking protocol to avoid problems with simultaneous updates.
+After the mail is delivered, the local mail delivery daemon
+.Pn /usr/libexec/comsat
+is notified, which in turn notifies users who have issued a
+``\fIbiff\fP y'' command that mail has arrived.
+.PP
+Mail queued in the directory
+.Pn /var/spool/mail
+is normally readable only by the recipient.
+To send mail that is secure against perusal
+(except by a code-breaker) you should use the secret mail facility,
+which encrypts the mail.
+.PP
+To set up the mail facility you should read the instructions in the
+file READ_ME in the directory
+.Pn /usr/src/usr.sbin/sendmail
+and then adjust the necessary configuration files.
+You should also set up the file
+.Pn /etc/aliases
+for your installation, creating mail groups as appropriate.
+For more informations see
+``Sendmail Installation and Operation Guide'' (SMM:8) and
+``Sendmail \- An Internetwork Mail Router'' (SMM:9).
+.Sh 3 "Setting up a UUCP connection"
+.LP
+The version of
+.Xr uucp
+included in \*(4B has the following features:
+.IP \(bu 3
+support for many auto call units and dialers
+in addition to the DEC DN11,
+.IP \(bu 3
+breakup of the spooling area into multiple subdirectories,
+.IP \(bu 3
+addition of an
+.Pn L.cmds
+file to control the set
+of commands that may be executed by a remote site,
+.IP \(bu 3
+enhanced ``expect-send'' sequence capabilities when
+logging in to a remote site,
+.IP \(bu 3
+new commands to be used in polling sites and
+obtaining snap shots of
+.Xr uucp
+activity,
+.IP \(bu 3
+additional protocols for different communication media.
+.LP
+This section gives a brief overview of
+.Xr uucp
+and points out the most important steps in its installation.
+.PP
+To connect two UNIX machines with a
+.Xr uucp
+network link using modems,
+one site must have an automatic call unit
+and the other must have a dialup port.
+It is better if both sites have both.
+.PP
+You should first read the paper in the UNIX System Manager's Manual:
+``Uucp Implementation Description'' (SMM:14).
+It describes in detail the file formats and conventions,
+and will give you a little context.
+In addition,
+the document ``setup.tblms'',
+located in the directory
+.Pn /usr/src/usr.bin/uucp/UUAIDS ,
+may be of use in tailoring the software to your needs.
+.PP
+The
+.Xr uucp
+support is located in three major directories:
+.Pn /usr/bin,
+.Pn /usr/lib/uucp,
+and
+.Pn /var/spool/uucp .
+User commands are kept in
+.Pn /usr/bin,
+operational commands in
+.Pn /usr/lib/uucp ,
+and
+.Pn /var/spool/uucp
+is used as a spooling area.
+The commands in
+.Pn /usr/bin
+are:
+.DS
+.TS
+l l.
+/usr/bin/uucp file-copy command
+/usr/bin/uux remote execution command
+/usr/bin/uusend binary file transfer using mail
+/usr/bin/uuencode binary file encoder (for \fIuusend\fP)
+/usr/bin/uudecode binary file decoder (for \fIuusend\fP)
+/usr/bin/uulog scans session log files
+/usr/bin/uusnap gives a snap-shot of \fIuucp\fP activity
+/usr/bin/uupoll polls remote system until an answer is received
+/usr/bin/uuname prints a list of known uucp hosts
+/usr/bin/uuq gives information about the queue
+.TE
+.DE
+The important files and commands in
+.Pn /usr/lib/uucp
+are:
+.DS
+.TS
+l l.
+/usr/lib/uucp/L-devices list of dialers and hard-wired lines
+/usr/lib/uucp/L-dialcodes dialcode abbreviations
+/usr/lib/uucp/L.aliases hostname aliases
+/usr/lib/uucp/L.cmds commands remote sites may execute
+/usr/lib/uucp/L.sys systems to communicate with, how to connect, and when
+/usr/lib/uucp/SEQF sequence numbering control file
+/usr/lib/uucp/USERFILE remote site pathname access specifications
+/usr/lib/uucp/uucico \fIuucp\fP protocol daemon
+/usr/lib/uucp/uuclean cleans up garbage files in spool area
+/usr/lib/uucp/uuxqt \fIuucp\fP remote execution server
+.TE
+.DE
+while the spooling area contains the following important files and directories:
+.DS
+.TS
+l l.
+/var/spool/uucp/C. directory for command, ``C.'' files
+/var/spool/uucp/D. directory for data, ``D.'', files
+/var/spool/uucp/X. directory for command execution, ``X.'', files
+/var/spool/uucp/D.\fImachine\fP directory for local ``D.'' files
+/var/spool/uucp/D.\fImachine\fPX directory for local ``X.'' files
+/var/spool/uucp/TM. directory for temporary, ``TM.'', files
+/var/spool/uucp/LOGFILE log file of \fIuucp\fP activity
+/var/spool/uucp/SYSLOG log file of \fIuucp\fP file transfers
+.TE
+.DE
+.PP
+To install
+.Xr uucp
+on your system,
+start by selecting a site name
+(shorter than 14 characters).
+A
+.Xr uucp
+account must be created in the password file and a password set up.
+Then,
+create the appropriate spooling directories with mode 755
+and owned by user
+.Xr uucp ,
+group \fIdaemon\fP.
+.PP
+If you have an auto-call unit,
+the L.sys, L-dialcodes, and L-devices files should be created.
+The L.sys file should contain
+the phone numbers and login sequences
+required to establish a connection with a
+.Xr uucp
+daemon on another machine.
+For example, our L.sys file looks something like:
+.DS
+adiron Any ACU 1200 out0123456789- ogin-EOT-ogin uucp
+cbosg Never Slave 300
+cbosgd Never Slave 300
+chico Never Slave 1200 out2010123456
+.DE
+The first field is the name of a site,
+the second shows when the machine may be called,
+the third field specifies how the host is connected
+(through an ACU, a hard-wired line, etc.),
+then comes the phone number to use in connecting through an auto-call unit,
+and finally a login sequence.
+The phone number
+may contain common abbreviations that are defined in the L-dialcodes file.
+The device specification should refer to devices
+specified in the L-devices file.
+Listing only ACU causes the
+.Xr uucp
+daemon,
+.Xr uucico ,
+to search for any available auto-call unit in L-devices.
+Our L-dialcodes file is of the form:
+.DS
+ucb 2
+out 9%
+.DE
+while our L-devices file is:
+.DS
+ACU cul0 unused 1200 ventel
+.DE
+Refer to the README file in the
+.Xr uucp
+source directory for more information about installation.
+.PP
+As
+.Xr uucp
+operates it creates (and removes) many small
+files in the directories underneath
+.Pn /var/spool/uucp .
+Sometimes files are left undeleted;
+these are most easily purged with the
+.Xr uuclean
+program.
+The log files can grow without bound unless trimmed back;
+.Xr uulog
+maintains these files.
+Many useful aids in maintaining your
+.Xr uucp
+installation are included in a subdirectory UUAIDS beneath
+.Pn /usr/src/usr.bin/uucp .
+Peruse this directory and read the ``setup'' instructions also located there.
diff --git a/share/doc/smm/01.setup/5.t b/share/doc/smm/01.setup/5.t
new file mode 100644
index 0000000..10b86dd
--- /dev/null
+++ b/share/doc/smm/01.setup/5.t
@@ -0,0 +1,586 @@
+.\" Copyright (c) 1980, 1986, 1988, 1993 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)5.t 8.1 (Berkeley) 7/27/93
+.\"
+.ds lq ``
+.ds rq ''
+.ds LH "Installing/Operating \*(4B
+.ds RH Network setup
+.ds CF \*(Dy
+.Sh 1 "Network setup"
+.PP
+\*(4B provides support for the standard Internet
+protocols IP, ICMP, TCP, and UDP. These protocols may be used
+on top of a variety of hardware devices ranging from
+serial lines to local area network controllers
+for the Ethernet. Network services are split between the
+kernel (communication protocols) and user programs (user
+services such as TELNET and FTP). This section describes
+how to configure your system to use the Internet networking support.
+\*(4B also supports the Xerox Network Systems (NS) protocols.
+IDP and SPP are implemented in the kernel,
+and other protocols such as Courier run at the user level.
+\*(4B provides some support for the ISO OSI protocols CLNP
+TP4, and ESIS. User level process
+complete the application protocols such as X.400 and X.500.
+.Sh 2 "System configuration"
+.PP
+To configure the kernel to include the Internet communication
+protocols, define the INET option.
+Xerox NS support is enabled with the NS option.
+ISO OSI support is enabled with the ISO option.
+In either case, include the pseudo-devices
+``pty'', and ``loop'' in your machine's configuration
+file.
+The ``pty'' pseudo-device forces the pseudo terminal device driver
+to be configured into the system, see
+.Xr pty (4),
+while the ``loop'' pseudo-device forces inclusion of the software loopback
+interface driver.
+The loop driver is used in network testing
+and also by the error logging system.
+.PP
+If you are planning to use the Internet network facilities on a 10Mb/s
+Ethernet, the pseudo-device ``ether'' should also be included
+in the configuration; this forces inclusion of the Address Resolution
+Protocol module used in mapping between 48-bit Ethernet
+and 32-bit Internet addresses.
+.PP
+Before configuring the appropriate networking hardware, you should
+consult the manual pages in section 4 of the Programmer's Manual
+selecting the appropriate interfaces for your architecture.
+.PP
+All network interface drivers including the loopback interface,
+require that their host address(es) be defined at boot time.
+This is done with
+.Xr ifconfig (8)
+commands included in the
+.Pn /etc/netstart
+file.
+Interfaces that are able to dynamically deduce the host
+part of an address may check that the host part of the address is correct.
+The manual page for each network interface
+describes the method used to establish a host's address.
+.Xr Ifconfig (8)
+can also be used to set options for the interface at boot time.
+Options are set independently for each interface, and
+apply to all packets sent using that interface.
+Alternatively, translations for such hosts may be set in advance
+or ``published'' by a \*(4B host by use of the
+.Xr arp (8)
+command.
+Note that the use of trailer link-level is now negotiated between \*(4B hosts
+using ARP,
+and it is thus no longer necessary to disable the use of trailers
+with
+.Xr ifconfig .
+.PP
+The OSI equivalent to ARP is ESIS (End System to Intermediate System Routing
+Protocol); running this protocol is mandatory, however one can manually add
+translations for machines that do not participate by use of the
+.Xr route (8)
+command.
+Additional information is provided in the manual page describing
+.Xr ESIS (4).
+.PP
+To use the pseudo terminals just configured, device
+entries must be created in the
+.Pn /dev
+directory. To create 32
+pseudo terminals (plenty, unless you have a heavy network load)
+execute the following commands.
+.DS
+\fB#\fP \fIcd /dev\fP
+\fB#\fP \fIMAKEDEV pty0 pty1\fP
+.DE
+More pseudo terminals may be made by specifying
+.Pn pty2 ,
+.Pn pty3 ,
+etc. The kernel normally includes support for 32 pseudo terminals
+unless the configuration file specifies a different number.
+Each pseudo terminal really consists of two files in
+.Pn /dev :
+a master and a slave. The master pseudo terminal file is named
+.Pn /dev/ptyp? ,
+while the slave side is
+.Pn /dev/ttyp? .
+Pseudo terminals are also used by several programs not related to the network.
+In addition to creating the pseudo terminals,
+be sure to install them in the
+.Pn /etc/ttys
+file (with a `none' in the second column so no
+.Xr getty
+is started).
+.Sh 2 "Local subnets"
+.PP
+In \*(4B the Internet support
+includes the notion of ``subnets''. This is a mechanism
+by which multiple local networks may appears as a single Internet
+network to off-site hosts. Subnetworks are useful because
+they allow a site to hide their local topology, requiring only a single
+route in external gateways;
+it also means that local network numbers may be locally administered.
+The standard describing this change in Internet addressing is RFC-950.
+.PP
+To set up local subnets one must first decide how the available
+address space (the Internet ``host part'' of the 32-bit address)
+is to be partitioned.
+Sites with a class A network
+number have a 24-bit host address space with which to work, sites with a
+class B network number have a 16-bit host address space, while sites with
+a class C network number have an 8-bit host address space\**.
+.FS
+If you are unfamiliar with the Internet addressing structure, consult
+``Address Mappings'', Internet RFC-796, J. Postel; available from
+the Internet Network Information Center at SRI.
+.FE
+To define local subnets you must steal some bits
+from the local host address space for use in extending the network
+portion of the Internet address. This reinterpretation of Internet
+addresses is done only for local networks; i.e. it is not visible
+to hosts off-site. For example, if your site has a class B network
+number, hosts on this network have an Internet address that contains
+the network number, 16 bits, and the host number, another
+16 bits. To define 254 local subnets, each
+possessing at most 255 hosts, 8 bits may be taken from the local part.
+(The use of subnets 0 and all-1's, 255 in this example, is discouraged
+to avoid confusion about broadcast addresses.)
+These new network
+numbers are then constructed by concatenating the original 16-bit network
+number with the extra 8 bits containing the local subnet number.
+.PP
+The existence of local subnets is communicated to the system at the time a
+network interface is configured with the
+.I netmask
+option to the
+.Xr ifconfig
+program. A ``network mask'' is specified to define the
+portion of the Internet address that is to be considered the network part
+for that network.
+This mask normally contains the bits corresponding to the standard
+network part as well as the portion of the local part
+that has been assigned to subnets.
+If no mask is specified when the address is set,
+it will be set according to the class of the network.
+For example, at Berkeley (class B network 128.32) 8 bits
+of the local part have been reserved for defining subnets;
+consequently the
+.Pn /etc/netstart
+file contains lines of the form
+.DS
+.ft CW
+/sbin/ifconfig le0 netmask 0xffffff00 128.32.1.7
+.DE
+This specifies that for interface ``le0'', the upper 24 bits of
+the Internet address should be used in calculating network numbers
+(netmask 0xffffff00), and the interface's Internet address is
+``128.32.1.7'' (host 7 on network 128.32.1). Hosts \fIm\fP on
+sub-network \fIn\fP of this network would then have addresses of
+the form ``128.32.\fIn\fP.\fIm\fP''; for example, host
+99 on network 129 would have an address ``128.32.129.99''.
+For hosts with multiple interfaces, the network mask should
+be set for each interface,
+although in practice only the mask of the first interface on each network
+is really used.
+.Sh 2 "Internet broadcast addresses"
+.PP
+The address defined as the broadcast address for Internet networks
+according to RFC-919 is the address with a host part of all 1's.
+The address used by 4.2BSD was the address with a host part of 0.
+\*(4B uses the standard broadcast address (all 1's) by default,
+but allows the broadcast address to be set (with
+.Xr ifconfig )
+for each interface.
+This allows networks consisting of both 4.2BSD, \*(Ps and \*(4B hosts
+to coexist while the upgrade process proceeds.
+In the presence of subnets, the broadcast address uses the subnet field
+as for normal host addresses, with the remaining host part set to 1's
+(or 0's, on a network that has not yet been converted).
+\*(4B hosts recognize and accept packets
+sent to the logical-network broadcast address as well as those sent
+to the subnet broadcast address, and when using an all-1's broadcast,
+also recognize and receive packets sent to host 0 as a broadcast.
+.Sh 2 "Routing"
+.PP
+If your environment allows access to networks not directly
+attached to your host you will need to set up routing information
+to allow packets to be properly routed. Two schemes are
+supported by the system. The first scheme
+employs a routing table management daemon.
+Optimally, you should use the routing daemon
+.Xr gated
+available from Cornell university.
+We use it on our systems and it works well,
+especially for multi-homed hosts using Serial Line IP (SLIP).
+Unfortunately, we were not able to obtain permission to
+include it on \*(4B.
+.PP
+If you do not wish to or cannot obtain
+.Xr gated ,
+the distribution does include
+.Xr routed (8)
+to maintain the system routing tables. The routing daemon
+uses a variant of the Xerox Routing Information Protocol
+to maintain up to date routing tables in a cluster of local
+area networks. By using the
+.Pn /etc/gateways
+file, the routing daemon can also be used to initialize static routes
+to distant networks (see the next section for further discussion).
+When the routing daemon is started up
+(usually from
+.Pn /etc/rc )
+it reads
+.Pn /etc/gateways
+if it exists and installs those routes defined there,
+then broadcasts on each local network
+to which the host is attached to find other instances of the routing
+daemon. If any responses are received, the routing daemons
+cooperate in maintaining a globally consistent view of routing
+in the local environment. This view can be extended to include
+remote sites also running the routing daemon by setting up suitable
+entries in
+.Pn /etc/gateways ;
+consult
+.Xr routed (8)
+for a more thorough discussion.
+.PP
+The second approach is to define a default or wildcard
+route to a smart
+gateway and depend on the gateway to provide ICMP routing
+redirect information to dynamically create a routing data
+base. This is done by adding an entry of the form
+.DS
+.ft CW
+/sbin/route add default \fIsmart-gateway\fP 1
+.DE
+to
+.Pn /etc/netstart ;
+see
+.Xr route (8)
+for more information. The default route
+will be used by the system as a ``last resort''
+in routing packets to their destination. Assuming the gateway
+to which packets are directed is able to generate the proper
+routing redirect messages, the system will then add routing
+table entries based on the information supplied. This approach
+has certain advantages over the routing daemon, but is
+unsuitable in an environment where there are only bridges (i.e.
+pseudo gateways that, for instance, do not generate routing
+redirect messages). Further, if the
+smart gateway goes down there is no alternative, save manual
+alteration of the routing table entry, to maintaining service.
+.PP
+The system always listens, and processes, routing redirect
+information, so it is possible to combine both of the above
+facilities. For example, the routing table management process
+might be used to maintain up to date information about routes
+to geographically local networks, while employing the wildcard
+routing techniques for ``distant'' networks. The
+.Xr netstat (1)
+program may be used to display routing table contents as well
+as various routing oriented statistics. For example,
+.DS
+\fB#\fP \fInetstat \-r\fP
+.DE
+will display the contents of the routing tables, while
+.DS
+\fB#\fP \fInetstat \-r \-s\fP
+.DE
+will show the number of routing table entries dynamically
+created as a result of routing redirect messages, etc.
+.Sh 2 "Use of \*(4B machines as gateways"
+.PP
+Several changes have been made in \*(4B in the area of gateway support
+(or packet forwarding, if one prefers).
+A new configuration option, GATEWAY, is used when configuring
+a machine to be used as a gateway.
+This option increases the size of the routing hash tables in the kernel.
+Unless configured with that option,
+hosts with only a single non-loopback interface never attempt
+to forward packets or to respond with ICMP error messages to misdirected
+packets.
+This change reduces the problems that may occur when different hosts
+on a network disagree on the network number or broadcast address.
+Another change is that \*(4B machines that forward packets back through
+the same interface on which they arrived
+will send ICMP redirects to the source host if it is on the same network.
+This improves the interaction of \*(4B gateways with hosts that configure
+their routes via default gateways and redirects.
+The generation of redirects may be disabled with the configuration option
+IPSENDREDIRECTS=0 or while the system is running by using the command:
+.DS
+.ft CW
+sysctl -w net.inet.ip.redirect=0
+.DE
+in environments where it may cause difficulties.
+.Sh 2 "Network databases"
+.PP
+Several data files are used by the network library routines
+and server programs. Most of these files are host independent
+and updated only rarely.
+.br
+.ne 1i
+.TS
+lfC l l.
+File Manual reference Use
+_
+/etc/hosts \fIhosts\fP\|(5) local host names
+/etc/networks \fInetworks\fP\|(5) network names
+/etc/services \fIservices\fP\|(5) list of known services
+/etc/protocols \fIprotocols\fP\|(5) protocol names
+/etc/hosts.equiv \fIrshd\fP\|(8) list of ``trusted'' hosts
+/etc/netstart \fIrc\fP\|(8) command script for initializing network
+/etc/rc \fIrc\fP\|(8) command script for starting standard servers
+/etc/rc.local \fIrc\fP\|(8) command script for starting local servers
+/etc/ftpusers \fIftpd\fP\|(8) list of ``unwelcome'' ftp users
+/etc/hosts.lpd \fIlpd\fP\|(8) list of hosts allowed to access printers
+/etc/inetd.conf \fIinetd\fP\|(8) list of servers started by \fIinetd\fP
+.TE
+The files distributed are set up for Internet hosts.
+Local networks and hosts should be added to describe the local
+configuration; the Berkeley entries may serve as examples
+(see also the section on
+.Pn /etc/hosts ).
+Network numbers will have to be chosen for each Ethernet.
+For sites connected to the Internet,
+the normal channels should be used for allocation of network
+numbers (contact hostmaster@SRI-NIC.ARPA).
+For other sites,
+these could be chosen more or less arbitrarily,
+but it is generally better to request official numbers
+to avoid conversion if a connection to the Internet (or others on the Internet)
+is ever established.
+.Sh 3 "Network servers"
+.PP
+Most network servers are automatically started up at boot time
+by the command file
+.Pn /etc/rc
+or by the Internet daemon (see below).
+These include the following:
+.TS
+lfC l l.
+Program Server Started by
+_
+/usr/sbin/syslogd error logging server \f(CW/etc/rc\fP
+/usr/sbin/named Internet name server \f(CW/etc/rc\fP
+/sbin/routed routing table management daemon \f(CW/etc/rc\fP
+/usr/sbin/rwhod system status daemon \f(CW/etc/rc\fP
+/usr/sbin/timed time synchronization daemon \f(CW/etc/rc\fP
+/usr/sbin/sendmail SMTP server \f(CW/etc/rc\fP
+/usr/libexec/rshd shell server inetd
+/usr/libexec/rexecd exec server inetd
+/usr/libexec/rlogind login server inetd
+/usr/libexec/telnetd TELNET server inetd
+/usr/libexec/ftpd FTP server inetd
+/usr/libexec/fingerd Finger server inetd
+/usr/libexec/tftpd TFTP server inetd
+.TE
+Consult the manual pages and accompanying documentation (particularly
+for named and sendmail) for details about their operation.
+.PP
+The use of
+.Xr routed
+and
+.Xr rwhod
+is controlled by shell
+variables set in
+.Pn /etc/netstart .
+By default,
+.Xr routed
+is used, but
+.Xr rwhod
+is not; they are enabled by setting the variables \fIroutedflags\fP and
+.Xr rwhod
+to strings other than ``NO.''
+The value of \fIroutedflags\fP provides host-specific options to
+.Xr routed .
+For example,
+.DS
+.ft CW
+routedflags=-q
+rwhod=NO
+.DE
+would run
+.Xr "routed -q"
+and would not run
+.Xr rwhod .
+.PP
+To have other network servers started as well,
+commands of the following sort should be placed in the site-dependent file
+.Pn /etc/rc.local .
+.DS
+.ft CW
+if [ -f /usr/sbin/timed ]; then
+ /usr/sbin/timed & echo -n ' timed' >/dev/console
+f\&i
+.DE
+.Sh 3 "Internet daemon"
+.PP
+In \*(4B most of the servers for user-visible services are started up by a
+``super server'', the Internet daemon. The Internet
+daemon,
+.Pn /usr/sbin/inetd ,
+acts as a master server for
+programs specified in its configuration file,
+.Pn /etc/inetd.conf ,
+listening for service requests for these servers, and starting
+up the appropriate program whenever a request is received.
+The configuration file contains lines containing a service
+name (as found in
+.Pn /etc/services ),
+the type of socket the
+server expects (e.g. stream or dgram), the protocol to be
+used with the socket (as found in
+.Pn /etc/protocols ),
+whether to wait for each server to complete before starting up another,
+the user name by which the server should run, the server
+program's name, and at most five arguments to pass to the
+server program.
+Some trivial services are implemented internally in
+.Xr inetd ,
+and their servers are listed as ``internal.''
+For example, an entry for the file
+transfer protocol server would appear as
+.DS
+.ft CW
+ftp stream tcp nowait root /usr/libexec/ftpd ftpd
+.DE
+Consult
+.Xr inetd (8)
+for more detail on the format of the configuration file
+and the operation of the Internet daemon.
+.Sh 3 "The \f(CW/etc/hosts.equiv\fP file"
+.PP
+The remote login and shell servers use an
+authentication scheme based on trusted hosts. The
+.Pn hosts.equiv
+file contains a list of hosts that are considered trusted
+and, under a single administrative control. When a user
+contacts a remote login or shell server requesting service,
+the client process passes the user's name and the official
+name of the host on which the client is located. In the simple
+case, if the host's name is located in
+.Pn hosts.equiv
+and the user has an account on the server's machine, then service
+is rendered (i.e. the user is allowed to log in, or the command
+is executed). Users may expand this ``equivalence'' of
+machines by installing a
+.Pn \&.rhosts
+file in their login directory.
+The root login is handled specially, bypassing the
+.Pn hosts.equiv
+file, and using only the
+.Pn /.rhosts
+file.
+.PP
+Thus, to create a class of equivalent machines, the
+.Pn hosts.equiv
+file should contain the \fIofficial\fP names for those machines.
+If you are running the name server, you may omit the domain part
+of the host name for machines in your local domain.
+For example, four machines on our local
+network are considered trusted, so the
+.Pn hosts.equiv
+file is of the form:
+.DS
+.ft CW
+vangogh.CS.Berkeley.EDU
+picasso.CS.Berkeley.EDU
+okeeffe.CS.Berkeley.EDU
+.DE
+.Sh 3 "The \f(CW/etc/ftpusers\fP file"
+.PP
+The FTP server included in the system provides support for an
+anonymous FTP account. Because of the inherent security problems
+with such a facility you should read this section carefully if
+you consider providing such a service.
+.PP
+An anonymous account is enabled by creating a user
+.Xr ftp .
+When a client uses the anonymous account a
+.Xr chroot (2)
+system call is performed by the server to restrict the client
+from moving outside that part of the filesystem where the
+user ftp home directory is located. Because a
+.Xr chroot
+call is used, certain programs and files used by the server
+process must be placed in the ftp home directory.
+Further, one must be
+sure that all directories and executable images are unwritable.
+The following directory setup is recommended. The
+use of the
+.Xr awk
+commands to copy the
+.Pn /etc/passwd
+and
+.Pn /etc/group
+files are \fBSTRONGLY\fP recommended.
+.DS
+\fB#\fP \fIcd ~ftp\fP
+\fB#\fP \fIchmod 555 .; chown ftp .; chgrp ftp .\fP
+\fB#\fP \fImkdir bin etc pub\fP
+\fB#\fP \fIchown root bin etc\fP
+\fB#\fP \fIchmod 555 bin etc\fP
+\fB#\fP \fIchown ftp pub\fP
+\fB#\fP \fIchmod 777 pub\fP
+\fB#\fP \fIcd bin\fP
+\fB#\fP \fIcp /bin/sh /bin/ls .\fP
+\fB#\fP \fIchmod 111 sh ls\fP
+\fB#\fP \fIcd ../etc\fP
+\fB#\fP \fIawk -F: '{$2="*";print$1":"$2":"$3":"$4":"$5":"$6":"}' < /etc/passwd > passwd\fP
+\fB#\fP \fIawk -F: '{$2="*";print$1":"$2":"}' < /etc/group > group\fP
+\fB#\fP \fIchmod 444 passwd group\fP
+.DE
+When local users wish to place files in the anonymous
+area, they must be placed in a subdirectory. In the
+setup here, the directory
+.Pn ~ftp/pub
+is used.
+.PP
+Aside from the problems of directory modes and such,
+the ftp server may provide a loophole for interlopers
+if certain user accounts are allowed.
+The file
+.Pn /etc/ftpusers
+is checked on each connection.
+If the requested user name is located in the file, the
+request for service is denied. This file normally has
+the following names on our systems.
+.DS
+uucp
+root
+.DE
+Accounts without passwords need not be listed in this file as the ftp
+server will refuse service to these users.
+Accounts with nonstandard shells (any not listed in
+.Pn /etc/shells )
+will also be denied access via ftp.
diff --git a/share/doc/smm/01.setup/6.t b/share/doc/smm/01.setup/6.t
new file mode 100644
index 0000000..d043474
--- /dev/null
+++ b/share/doc/smm/01.setup/6.t
@@ -0,0 +1,663 @@
+.\" Copyright (c) 1980, 1986, 1988, 1993 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)6.t 8.1 (Berkeley) 7/27/93
+.\"
+.ds LH "Installing/Operating \*(4B
+.ds CF \*(Dy
+.Sh 1 "System operation"
+.PP
+This section describes procedures used to operate a \*(4B UNIX system.
+Procedures described here are used periodically, to reboot the system,
+analyze error messages from devices, do disk backups, monitor
+system performance, recompile system software and control local changes.
+.Sh 2 "Bootstrap and shutdown procedures"
+.PP
+In a normal reboot, the system checks the disks and comes up multi-user
+without intervention at the console.
+Such a reboot
+can be stopped (after it prints the date) with a ^C (interrupt).
+This will leave the system in single-user mode, with only the console
+terminal active.
+(If the console has been marked ``insecure'' in
+.Pn /etc/ttys
+you must enter the root password to bring the machine to single-user mode.)
+It is also possible to allow the filesystem checks to complete
+and then to return to single-user mode by signaling
+.Xr fsck (8)
+with a QUIT signal (^\|\e).
+.PP
+To bring the system up to a multi-user configuration from the single-user
+status,
+all you have to do is hit ^D on the console. The system
+will then execute
+.Pn /etc/rc ,
+a multi-user restart script (and
+.Pn /etc/rc.local ),
+and come up on the terminals listed as
+active in the file
+.Pn /etc/ttys .
+See
+.Xr init (8)
+and
+.Xr ttys (5) for more details.
+Note, however, that this does not cause a filesystem check to be done.
+Unless the system was taken down cleanly, you should run
+``fsck \-p'' or force a reboot with
+.Xr reboot (8)
+to have the disks checked.
+.PP
+To take the system down to a single user state you can use
+.DS
+\fB#\fP \fIkill 1\fP
+.DE
+or use the
+.Xr shutdown (8)
+command (which is much more polite, if there are other users logged in)
+when you are running multi-user.
+Either command will kill all processes and give you a shell on the console,
+as if you had just booted. Filesystems remain mounted after the
+system is taken single-user. If you wish to come up multi-user again, you
+should do this by:
+.DS
+\fB#\fP \fIcd /\fP
+\fB#\fP \fI/sbin/umount -a\fP
+\fB#\fP \fI^D\fP
+.DE
+.PP
+Each system shutdown, crash, processor halt and reboot
+is recorded in the system log
+with its cause.
+.Sh 2 "Device errors and diagnostics"
+.PP
+When serious errors occur on peripherals or in the system, the system
+prints a warning diagnostic on the console.
+These messages are collected
+by the system error logging process
+.Xr syslogd (8)
+and written into a system error log file
+.Pn /var/log/messages .
+Less serious errors are sent directly to
+.Xr syslogd ,
+which may log them on the console.
+The error priorities that are logged and the locations to which they are logged
+are controlled by
+.Pn /etc/syslog.conf .
+See
+.Xr syslogd (8)
+for further details.
+.PP
+Error messages printed by the devices in the system are described with the
+drivers for the devices in section 4 of the programmer's manual.
+If errors occur suggesting hardware problems, you should contact
+your hardware support group or field service. It is a good idea to
+examine the error log file regularly
+(e.g. with the command \fItail \-r /var/log/messages\fP).
+.Sh 2 "Filesystem checks, backups, and disaster recovery"
+.PP
+Periodically (say every week or so in the absence of any problems)
+and always (usually automatically) after a crash,
+all the filesystems should be checked for consistency
+by
+.Xr fsck (1).
+The procedures of
+.Xr reboot (8)
+should be used to get the system to a state where a filesystem
+check can be done manually or automatically.
+.PP
+Dumping of the filesystems should be done regularly,
+since once the system is going it is easy to
+become complacent.
+Complete and incremental dumps are easily done with
+.Xr dump (8).
+You should arrange to do a towers-of-hanoi dump sequence; we tune
+ours so that almost all files are dumped on two tapes and kept for at
+least a week in most every case. We take full dumps every month (and keep
+these indefinitely).
+Operators can execute ``dump w'' at login that will tell them what needs
+to be dumped
+(based on the
+.Pn /etc/fstab
+information).
+Be sure to create a group
+.B operator
+in the file
+.Pn /etc/group
+so that dump can notify logged-in operators when it needs help.
+.PP
+More precisely, we have three sets of dump tapes: 10 daily tapes,
+5 weekly sets of 2 tapes, and fresh sets of three tapes monthly.
+We do daily dumps circularly on the daily tapes with sequence
+`3 2 5 4 7 6 9 8 9 9 9 ...'.
+Each weekly is a level 1 and the daily dump sequence level
+restarts after each weekly dump.
+Full dumps are level 0 and the daily sequence restarts after each full dump
+also.
+.PP
+Thus a typical dump sequence would be:
+.br
+.ne 6
+.TS
+center;
+c c c c c
+n n n l l.
+tape name level number date opr size
+_
+FULL 0 Nov 24, 1992 operator 137K
+D1 3 Nov 28, 1992 operator 29K
+D2 2 Nov 29, 1992 operator 34K
+D3 5 Nov 30, 1992 operator 19K
+D4 4 Dec 1, 1992 operator 22K
+W1 1 Dec 2, 1992 operator 40K
+D5 3 Dec 4, 1992 operator 15K
+D6 2 Dec 5, 1992 operator 25K
+D7 5 Dec 6, 1992 operator 15K
+D8 4 Dec 7, 1992 operator 19K
+W2 1 Dec 9, 1992 operator 118K
+D9 3 Dec 11, 1992 operator 15K
+D10 2 Dec 12, 1992 operator 26K
+D1 5 Dec 15, 1992 operator 14K
+W3 1 Dec 17, 1992 operator 71K
+D2 3 Dec 18, 1992 operator 13K
+FULL 0 Dec 22, 1992 operator 135K
+.TE
+We do weekly dumps often enough that daily dumps always fit on one tape.
+.PP
+Dumping of files by name is best done by
+.Xr tar (1)
+but the amount of data that can be moved in this way is limited
+to a single tape.
+Finally if there are enough drives entire
+disks can be copied with
+.Xr dd (1)
+using the raw special files and an appropriate
+blocking factor; the number of sectors per track is usually
+a good value to use, consult
+.Pn /etc/disktab .
+.PP
+It is desirable that full dumps of the root filesystem be
+made regularly.
+This is especially true when only one disk is available.
+Then, if the
+root filesystem is damaged by a hardware or software failure, you
+can rebuild a workable disk doing a restore in the
+same way that the initial root filesystem was created.
+.PP
+Exhaustion of user-file space is certain to occur
+now and then; disk quotas may be imposed, or if you
+prefer a less fascist approach, try using the programs
+.Xr du (1),
+.Xr df (1),
+and
+.Xr quot (8),
+combined with threatening
+messages of the day, and personal letters.
+.Sh 2 "Moving filesystem data"
+.PP
+If you have the resources,
+the best way to move a filesystem
+is to dump it to a spare disk partition, or magtape, using
+.Xr dump (8),
+use
+.Xr newfs (8)
+to create the new filesystem,
+and restore the filesystem using
+.Xr restore (8).
+Filesystems may also be moved by piping the output of
+.Xr dump
+to
+.Xr restore .
+The
+.Xr restore
+program uses an ``in-place'' algorithm that
+allows filesystem dumps to be restored without concern for the
+original size of the filesystem. Further, portions of a
+filesystem may be selectively restored using a method similar
+to the tape archive program.
+.PP
+If you have to merge a filesystem into another, existing one,
+the best bet is to use
+.Xr tar (1).
+If you must shrink a filesystem, the best bet is to dump
+the original and restore it onto the new filesystem.
+If you
+are playing with the root filesystem and only have one drive,
+the procedure is more complicated.
+If the only drive is a Winchester disk, this procedure may not be used
+without overwriting the existing root or another partition.
+What you do is the following:
+.IP 1.
+GET A SECOND PACK, OR USE ANOTHER DISK DRIVE!!!!
+.IP 2.
+Dump the root filesystem to tape using
+.Xr dump (8).
+.IP 3.
+Bring the system down.
+.IP 4.
+Mount the new pack in the correct disk drive, if
+using removable media.
+.IP 5.
+Load the distribution tape and install the new
+root filesystem as you did when first installing the system.
+Boot normally
+using the newly created disk filesystem.
+.PP
+Note that if you change the disk partition tables or add new disk
+drivers they should also be added to the standalone system in
+.Pn /sys/<architecture>/stand ,
+and the default disk partition tables in
+.Pn /etc/disktab
+should be modified.
+.Sh 2 "Monitoring system performance"
+.PP
+The
+.Xr systat
+program provided with the system is designed to be an aid to monitoring
+systemwide activity. The default ``pigs'' mode shows a dynamic ``ps''.
+By running in the ``vmstat'' mode
+when the system is active you can judge the system activity in several
+dimensions: job distribution, virtual memory load, paging and swapping
+activity, device interrupts, and disk and cpu utilization.
+Ideally, there should be few blocked (b) jobs,
+there should be little paging or swapping activity, there should
+be available bandwidth on the disk devices (most single arms peak
+out at 20-30 tps in practice), and the user cpu utilization (us) should
+be high (above 50%).
+.PP
+If the system is busy, then the count of active jobs may be large,
+and several of these jobs may often be blocked (b). If the virtual
+memory is active, then the paging demon will be running (sr will
+be non-zero). It is healthy for the paging demon to free pages when
+the virtual memory gets active; it is triggered by the amount of free
+memory dropping below a threshold and increases its pace as free memory
+goes to zero.
+.PP
+If you run in the ``vmstat'' mode
+when the system is busy, you can find
+imbalances by noting abnormal job distributions. If many
+processes are blocked (b), then the disk subsystem
+is overloaded or imbalanced. If you have several non-dma
+devices or open teletype lines that are ``ringing'', or user programs
+that are doing high-speed non-buffered input/output, then the system
+time may go high (60-70% or higher).
+It is often possible to pin down the cause of high system time by
+looking to see if there is excessive context switching (cs), interrupt
+activity (in) and per-device interrupt counts,
+or system call activity (sy). Cumulatively on one of
+our large machines we average about 60-200 context switches and interrupts
+per second and about 50-500 system calls per second.
+.PP
+If the system is heavily loaded, or if you have little memory
+for your load (2M is little in most any case), then the system
+may be forced to swap. This is likely to be accompanied by a noticeable
+reduction in system performance and pregnant pauses when interactive
+jobs such as editors swap out.
+If you expect to be in a memory-poor environment
+for an extended period you might consider administratively
+limiting system load.
+.Sh 2 "Recompiling and reinstalling system software"
+.PP
+It is easy to regenerate either the entire system or a single utility,
+and it is a good idea to try rebuilding pieces of the system to build
+confidence in the procedures.
+.LP
+In general, there are six well-known targets supported by
+all the makefiles on the system:
+.IP all 9
+This entry is the default target, the same as if no target is specified.
+This target builds the kernel, binary or library, as well as its
+associated manual pages.
+This target \fBdoes not\fP build the dependency files.
+Some of the utilities require that a \fImake depend\fP be done before
+a \fImake all\fP can succeed.
+.IP depend
+Build the include file dependency file, ``.depend'', which is
+read by
+.Xr make .
+See
+.Xr mkdep (1)
+for further details.
+.IP install
+Install the kernel, binary or library, as well as its associated
+manual pages.
+See
+.Xr install (1)
+for further details.
+.IP clean
+Remove the kernel, binary or library, as well as any object files
+created when building it.
+.IP cleandir
+The same as clean, except that the dependency files and formatted
+manual pages are removed as well.
+.IP obj
+Build a shadow directory structure in the area referenced by
+.Pn /usr/obj
+and create a symbolic link in the current source directory to
+referenced it, named ``obj''.
+Once this shadow structure has been created, all the files created by
+.Xr make
+will live in the shadow structure, and
+.Pn /usr/src
+may be mounted read-only by multiple machines.
+Doing a \fImake obj\fP in
+.Pn /usr/src
+will build the shadow directory structure for everything on the
+system except for the contributed, old, and kernel software.
+.PP
+The system consists of three major parts:
+the kernel itself, found in
+.Pn /usr/src/sys ,
+the libraries , found in
+.Pn /usr/src/lib ,
+and the user programs (the rest of
+.Pn /usr/src ).
+.PP
+Deprecated software, found in
+.Pn /usr/src/old ,
+often has old style makefiles;
+some of it does not compile under \*(4B at all.
+.PP
+Contributed software, found in
+.Pn /usr/src/contrib ,
+usually does not support the ``cleandir'', ``depend'', or ``obj'' targets.
+.PP
+The kernel does not support the ``obj'' shadow structure.
+All kernels are compiled in subdirectories of
+.Pn /usr/src/sys/compile
+which is usually abbreviated as
+.Pn /sys/compile .
+If you want to mount your source tree read-only,
+.Pn /usr/src/sys/compile
+will have to be on a separate filesystem from
+.Pn /usr/src .
+Separation from
+.Pn /usr/src
+can be done by making
+.Pn /usr/src/sys/compile
+a symbolic link that references
+.Pn /usr/obj/sys/compile .
+If it is a symbolic link, the \fIS\fP variable in the kernel
+Makefile must be changed from
+.Pn \&../..
+to the absolute pathname needed to locate the kernel sources, usually
+.Pn /usr/src/sys .
+The symbolic link created by
+.Xr config (8)
+for
+.Pn machine
+must also be manually changed to an absolute pathname.
+Finally, the
+.Pn /usr/src/sys/libkern/obj
+directory must be located in
+.Pn /usr/obj/sys/libkern .
+.PP
+Each of the standard utilities and libraries may be built and
+installed by changing directories into the correct location and
+doing:
+.DS
+\fB#\fP \fImake\fP
+\fB#\fP \fImake install\fP
+.DE
+Note, if system include files have changed between compiles,
+.Xr make
+will not do the correct dependency checks if the dependency
+files have not been built using the ``depend'' target.
+.PP
+The entire library and utility suite for the system may be recompiled
+from scratch by changing directory to
+.Pn /usr/src
+and doing:
+.DS
+\fB#\fP \fImake build\fP
+.DE
+This target installs the system include files, cleans the source
+tree, builds and installs the libraries, and builds and installs
+the system utilities.
+.PP
+To recompile a specific program, first determine where the binary
+resides with the
+.Xr whereis (1)
+command, then change to the corresponding source directory and build
+it with the Makefile in the directory.
+For instance, to recompile ``passwd'',
+all one has to do is:
+.DS
+\fB#\fP \fIwhereis passwd\fP
+\fB/usr/bin/passwd\fP
+\fB#\fP \fIcd /usr/src/usr.bin/passwd\fP
+\fB#\fP \fImake\fP
+\fB#\fP \fImake install\fP
+.DE
+this will compile and install the
+.Xr passwd
+utility.
+.PP
+If you wish to recompile and install all programs into a particular
+target area you can override the default path prefix by doing:
+.DS
+\fB#\fP \fImake\fP
+\fB#\fP \fImake DESTDIR=\fPpathname \fIinstall\fP
+.DE
+Similarly, the mode, owner, group, and other characteristics of
+the installed object can be modified by changing other default
+make variables.
+See
+.Xr make (1),
+.Pn /usr/src/share/mk/bsd.README ,
+and the ``.mk'' scripts in the
+.Pn /usr/share/mk
+directory for more information.
+.PP
+If you modify the C library or system include files, to change a
+system call for example, and want to rebuild and install everything,
+you have to be a little careful.
+You must ensure that the include files are installed before anything
+is compiled, and that the libraries are installed before the remainder
+of the source, otherwise the loaded images will not contain the new
+routine from the library.
+If include files have been modified, the following commands should
+be done first:
+.DS
+\fB#\fP \fIcd /usr/src/include\fP
+\fB#\fP \fImake install\fP
+.DE
+Then, if, for example, C library files have been modified, the
+following commands should be executed:
+.DS
+\fB#\fP \fIcd /usr/src/lib/libc\fP
+\fB#\fP \fImake depend\fP
+\fB#\fP \fImake\fP
+\fB#\fP \fImake install\fP
+\fB#\fP \fIcd /usr/src\fP
+\fB#\fP \fImake depend\fP
+\fB#\fP \fImake\fP
+\fB#\fP \fImake install\fP
+.DE
+Alternatively, the \fImake build\fP command described above will
+accomplish the same tasks.
+This takes several hours on a reasonably configured machine.
+.Sh 2 "Making local modifications"
+.PP
+The source for locally written commands is normally stored in
+.Pn /usr/src/local ,
+and their binaries are kept in
+.Pn /usr/local/bin .
+This isolation of local binaries allows
+.Pn /usr/bin ,
+and
+.Pn /bin
+to correspond to the distribution tape (and to the manuals that
+people can buy).
+People using local commands should be made aware that they are not
+in the base manual.
+Manual pages for local commands should be installed in
+.Pn /usr/local/man/cat[1-8].
+The
+.Xr man (1)
+command automatically finds manual pages placed in
+/usr/local/man/cat[1-8] to encourage this practice (see
+.Xr man.conf (5)).
+.Sh 2 "Accounting"
+.PP
+UNIX optionally records two kinds of accounting information:
+connect time accounting and process resource accounting. The connect
+time accounting information is stored in the file
+.Pn /var/log/wtmp ,
+which is summarized by the program
+.Xr ac (8).
+The process time accounting information is stored in the file
+.Pn /var/account/acct
+after it is enabled by
+.Xr accton (8),
+and is analyzed and summarized by the program
+.Xr sa (8).
+.PP
+If you need to recharge for computing time, you can develop
+procedures based on the information provided by these commands.
+A convenient way to do this is to give commands to the clock daemon
+.Pn /usr/sbin/cron
+to be executed every day at a specified time.
+This is done by adding lines to
+.Pn /etc/crontab.local ;
+see
+.Xr cron (8)
+for details.
+.Sh 2 "Resource control"
+.PP
+Resource control in the current version of UNIX is more
+elaborate than in most UNIX systems. The disk quota
+facilities developed at the University of Melbourne have
+been incorporated in the system and allow control over the
+number of files and amount of disk space each user and/or group may use
+on each filesystem. In addition, the resources consumed
+by any single process can be limited by the mechanisms of
+.Xr setrlimit (2).
+As distributed, the latter mechanism
+is voluntary, though sites may choose to modify the login
+mechanism to impose limits not covered with disk quotas.
+.PP
+To use the disk quota facilities, the system must be
+configured with ``options QUOTA''. Filesystems may then
+be placed under the quota mechanism by creating a null file
+.Pn quota.user
+and/or
+.Pn quota.group
+at the root of the filesystem, running
+.Xr quotacheck (8),
+and modifying
+.Pn /etc/fstab
+to show that the filesystem is to run
+with disk quotas (options userquota and/or groupquota).
+The
+.Xr quotaon (8)
+program may then be run to enable quotas.
+.PP
+Individual quotas are applied by using the quota editor
+.Xr edquota (8).
+Users may view their quotas (but not those of other users) with the
+.Xr quota (1)
+program. The
+.Xr repquota (8)
+program may be used to summarize the quotas and current
+space usage on a particular filesystem or filesystems.
+.PP
+Quotas are enforced with \fIsoft\fP and \fIhard\fP limits.
+When a user and/or group first reaches a soft limit on a resource, a
+message is generated on their terminal. If the user and/or group fails to
+lower the resource usage below the soft limit
+for longer than the time limit established for that filesystem
+(default seven days) the system then treats the soft limit as a
+\fIhard\fP limit and disallows any allocations until enough space is
+reclaimed to bring the user and/or group back below the soft limit.
+Hard limits are enforced strictly resulting in errors when a user
+and/or group tries to create or write a file. Each time a hard limit is
+exceeded the system will generate a message on the user's terminal.
+.PP
+Consult the auxiliary document, ``Disc Quotas in a UNIX Environment'' (SMM:4)
+and the appropriate manual entries for more information.
+.Sh 2 "Network troubleshooting"
+.PP
+If you have anything more than a trivial network configuration,
+from time to time you are bound to run into problems. Before
+blaming the software, first check your network connections. On
+networks such as the Ethernet a
+loose cable tap or misplaced power cable can result in severely
+deteriorated service. The
+.Xr netstat (1)
+program may be of aid in tracking down hardware malfunctions.
+In particular, look at the \fB\-i\fP and \fB\-s\fP options in the manual page.
+.PP
+Should you believe a communication protocol problem exists,
+consult the protocol specifications and attempt to isolate the
+problem in a packet trace. The SO_DEBUG option may be supplied
+before establishing a connection on a socket, in which case the
+system will trace all traffic and internal actions (such as timers
+expiring) in a circular trace buffer.
+This buffer may then be printed out with the
+.Xr trpt (8)
+program.
+Most of the servers distributed with the system
+accept a \fB\-d\fP option forcing
+all sockets to be created with debugging turned on.
+Consult the appropriate manual pages for more information.
+.Sh 2 "Files that need periodic attention"
+.PP
+We conclude the discussion of system operations by listing
+the files that require periodic attention or are system specific:
+.TS
+center;
+lfC l.
+/etc/fstab how disk partitions are used
+/etc/disktab default disk partition sizes/labels
+/etc/printcap printer database
+/etc/gettytab terminal type definitions
+/etc/remote names and phone numbers of remote machines for \fItip\fP(1)
+/etc/group group memberships
+/etc/motd message of the day
+/etc/master.passwd password file; each account has a line
+/etc/rc.local local system restart script; runs reboot; starts daemons
+/etc/inetd.conf local internet servers
+/etc/hosts local host name database
+/etc/networks network name database
+/etc/services network services database
+/etc/hosts.equiv hosts under same administrative control
+/etc/syslog.conf error log configuration for \fIsyslogd\fP\|(8)
+/etc/ttys enables/disables ports
+/etc/crontab commands that are run periodically
+/etc/crontab.local local commands that are run periodically
+/etc/aliases mail forwarding and distribution groups
+/var/account/acct raw process account data
+/var/log/messages system error log
+/var/log/wtmp login session accounting
+.TE
+.pn 2
+.bp
+.PX
diff --git a/share/doc/smm/01.setup/Makefile b/share/doc/smm/01.setup/Makefile
new file mode 100644
index 0000000..ce68a5c
--- /dev/null
+++ b/share/doc/smm/01.setup/Makefile
@@ -0,0 +1,15 @@
+# @(#)Makefile 8.1 (Berkeley) 7/27/93
+
+DIR= smm/01.setup
+SRCS= 0.t 1.t 2.t 3.t 4.t 5.t 6.t
+FILES= ${SRCS}
+MACROS= -ms
+
+paper.ps: ${SRCS}
+ ${TBL} ${SRCS} | ${ROFF} > ${.TARGET}
+
+install: ${SRCS}
+ install -c -o ${BINOWN} -g ${BINGRP} -m 444 \
+ Makefile ${FILES} ${EXTRA} ${DESTDIR}${BINDIR}/${DIR}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/smm/01.setup/spell.ok b/share/doc/smm/01.setup/spell.ok
new file mode 100644
index 0000000..c4f58a2
--- /dev/null
+++ b/share/doc/smm/01.setup/spell.ok
@@ -0,0 +1,618 @@
+A1096A
+AA
+ACU
+AMD
+Automounter
+BA
+BLOCKSIZE
+BSD
+Bb
+Bostic
+Bourne
+Bs
+Bz
+CCI
+CCITT
+CLNP
+CLTP
+COMPAT
+CPU's
+CS80
+CSRG
+CW
+Catseye
+Cyl
+DAT
+DECstation
+DESTDIR
+DISK's
+DISKTYPE
+DMA
+DN11
+DV
+DaVinci
+Dk
+Dn
+Dy
+EBCDIC
+EEPROM's
+EINTR
+EISA
+EOT
+ERESTART
+ESIS
+Emulex
+Exabyte
+FDDI
+FIPS
+FPU
+FTAM
+Filesystem
+Filesystems
+GCC
+GENERIC.hp300
+GX
+Gatorbox
+HDLC
+HIL
+HP
+HP's
+HP300
+HP300s
+HP433
+HP9000
+HPBSD
+HPUXCOMPAT
+Hibler
+IB
+ICMP
+IDP
+IDs
+IFLNK
+IP
+IPC
+IPSENDREDIRECTS
+IPX
+ISA
+ISO
+ISVTX
+Intel
+Jul
+Karels
+Kerberos
+L.aliases
+L.cmds
+L.sys
+LAN
+LAPB
+LFS
+LH
+LK201
+LOGFILE
+Leffler
+Luna
+MAKEDEV.local
+MB
+MC68040
+MFS
+MIB
+MIPS
+MISC
+MMU
+MT02
+Macklem
+Makefile
+Makefiles
+Maxtor
+McKusick
+NFS
+NIC.ARPA
+NPSECT
+NTP
+OHPUX
+OS
+OSI
+OSes
+Omron
+PCATCH
+PDT
+PMAD
+PMAG
+PMAX
+PMAZ
+POSIX
+POSIX.1
+POSIX.2
+PSD:5
+PVRX
+Pathnames
+Pendry
+Postel
+README
+RFC
+RH
+RISC
+ROM
+RS232
+RZ23
+RZ55
+RZ57
+SCSI
+SEQF
+SIOCGIFCONF
+SLC
+SMM:1
+SMM:10
+SMM:13
+SMM:14
+SMM:2
+SMM:3
+SMM:4
+SMM:6
+SMM:7
+SMM:8
+SMM:9
+SMTP
+SPARC
+SPARCstation
+SPP
+SRC
+SUNOS
+Sbus
+Solaris
+Standalone
+Std1003.1
+Std1003.2
+SunOS
+TAI
+TAPE's
+TBOOT
+TCP
+TIOCSCTTY
+TK50
+TM
+TP4
+TURBOchannel
+TVRX
+TZ
+Tcpmux
+Topcat
+Tue
+UCB
+UDP
+UFS
+ULTRIX
+ULTRIXCOMPAT
+UNIX''SMM:1
+USERFILE
+USL
+UTC
+UUAIDS
+UX
+Ux
+VAX
+VFS
+VME
+X11R5
+XX
+Xinu
+a,c,u,p
+a.out
+adaptor
+adaptors
+addrlen
+adiron
+adm
+aka
+aliases.db
+amd
+autochanger
+autoconf
+autoconfiguration
+autoconfigures
+bdes
+bootblock
+bootimage
+bootp
+bootpath
+bootrom
+bootsd
+bootstrapped
+bs
+bsd
+bsd.README
+btree
+bwtwo
+c.f
+callback
+cbosg
+cbosgd
+centronics
+cfb0
+cgsix
+cgthree
+changelist
+chflags
+chico
+chpass
+cksum
+cleandir
+cleanerd
+clnp
+cltp
+conf
+conformant
+contrib
+cpio
+crontab
+crontab.local
+cs
+csh.cshrc
+csh.login
+csh.logout
+cshrc
+ct
+cul0
+db
+dbopen
+dc7085
+deadfs
+dev
+dgram
+dialcode
+dialcodes
+dict
+disk3
+diskful
+disklabel
+disklabels
+disktab
+dm
+dm.conf
+dm.config
+dma
+doc
+endian
+es
+esis
+ext
+fd
+fdesc
+fifofs
+files.HOST
+fileservers
+filesystem
+filesystems
+foo
+frags
+friend.host.inet.number
+fsf
+fstab
+fstype
+ftpusers
+ftpwelcome
+fts
+funopen
+gcc
+genvmunix
+getcap
+gettytab
+gid
+gid's
+gids
+groff
+groupquota
+hangup
+hanoi
+heapsort
+hexdump
+hier
+host.inet.number
+hostmaster
+hosts.equiv
+hosts.lpd
+hp
+hp300
+hpib0
+inetd.conf
+inline
+inode
+int
+intr
+iob
+iso
+kbyte
+kbytes
+kdb
+kdump
+kerberos
+kerberosIV
+kernfs
+kmem
+krb.conf
+ksh
+ktrace
+kvm
+labelling
+lastlog
+ld.so.cache
+le
+le0
+lfs
+lib
+libc
+libdata
+libedit
+libexec
+libkern
+libkvm
+libutil
+localhost
+lofs
+logname
+loopback
+lq
+lun
+luna68k
+magtape
+mail.local
+mail.rc
+maillog
+maillog.0
+maillog.1
+maillog.2
+maillog.3
+maillog.4
+maillog.5
+maillog.6
+maillog.7
+makefiles
+man.conf
+man0
+manl
+master.passwd
+maxine
+mckusick
+mdec
+mediainit
+mem
+mergesort
+mfb0
+mfs
+misc
+miscfs
+mk
+mkdb
+mkdep
+mkfifo
+mmap
+mnt
+mono
+motd
+mountd
+mqueue
+msgbuf
+mtree
+my.domain
+myfriend
+myfriend.my.domain
+myname
+myname.my.domain
+mysitename
+namelen
+net.inet.ip.redirect
+netgroup
+netinet
+netiso
+netmask
+netns
+netstart
+netx25
+newdev
+newlfs
+news3400
+newvmunix
+nfs
+nfsd
+nfsiod
+nfsstat
+nfssvc
+nodump
+nowait
+npsect
+nr
+nrmt0
+nrst0
+nsect
+nullfs
+nvi
+obj
+ogin
+ok
+okeeffe.CS.Berkeley.EDU
+olddev
+oldroot
+opr
+osockaddr
+out0123456789
+out2010123456
+pageable
+pathname
+pathnames
+pcc
+picasso.CS.Berkeley.EDU
+pid
+pm0
+pmake
+pmap
+pmax
+posix
+printcap
+pt0
+pty
+pty.c
+pty0
+pty1
+pty2
+pty3
+ptyp
+pwd.db
+quota.group
+quota.user
+quotas.user
+radixsort
+rc.local
+rct
+rd
+rd0
+rdsk
+recno
+rew
+rf
+rhosts
+rmt0
+rmt12
+ro
+roff
+root.dump
+root.image
+routedflags
+rq
+rrd0d
+rrz?a
+rrz?c
+rsd0d
+rsd3a
+rsf
+rst0
+rw
+rxx0
+rz
+rz0
+rz1c
+rz?a
+sbin
+sc14
+sc7
+scsi0
+scsiformat
+sd
+sd0
+sd2b
+sd3
+sd3a
+sd660
+secretmail
+securelevel
+securettys
+sendmail.cf
+setsid
+setup.tblms
+shar
+shareable
+sizeof
+skel
+sockaddr
+sparc
+specfs
+spwd.db
+sr
+src
+srvtab
+st
+st.st
+standalone
+std
+std.9600
+stderr
+stdin
+stdout
+subr
+sunos
+sw
+sy
+sysctl
+sysctl.c
+syslog.0
+syslog.1
+syslog.2
+syslog.3
+syslog.4
+syslog.5
+syslog.6
+syslog.7
+syslog.conf
+tahoe
+tapehost
+tcp
+termios
+tmac
+tmp
+toor
+tps
+tput
+tsleep
+tty.c
+tty00
+ttya
+ttyb
+ttyd
+ttyp
+ttyp0
+ttytype
+types.h
+tz
+tz6
+ucb
+ufs
+uid
+uid's
+uids
+uipc
+umap
+umapfs
+un.h
+unaddr.sun
+uname
+unistd.h
+userid
+username
+userquota
+usr.bin
+usr.sbin
+utmp
+uucp.daemon
+uucppublic
+uw
+vangogh.CS.Berkeley.EDU
+var
+vax
+ventel
+vfs
+vm
+vmunix
+vmunix.net
+vmunix.tape
+vnode
+vnodes
+vol
+vt100
+wildcard
+wr
+wsrc
+wtmp
+xargs
+xbpf
+xcfb0
+xf
+xp
+xpbf
+xpf
+xsf
+xx
+xx0
+xxx
+your.host.inet.number
+yymmddhhmm
+zA
+zoneinfo
diff --git a/share/doc/smm/04.quotas/Makefile b/share/doc/smm/04.quotas/Makefile
new file mode 100644
index 0000000..f55290d
--- /dev/null
+++ b/share/doc/smm/04.quotas/Makefile
@@ -0,0 +1,7 @@
+# @(#)Makefile 8.1 (Berkeley) 6/8/93
+
+DIR= smm/04.quotas
+SRCS= quotas.ms
+MACROS= -ms
+
+.include <bsd.doc.mk>
diff --git a/share/doc/smm/04.quotas/quotas.ms b/share/doc/smm/04.quotas/quotas.ms
new file mode 100644
index 0000000..3691830
--- /dev/null
+++ b/share/doc/smm/04.quotas/quotas.ms
@@ -0,0 +1,318 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)quotas.ms 8.1 (Berkeley) 6/8/93
+.\"
+.EH 'SMM:4-%''Disc Quotas in a \s-2UNIX\s+2 Environment'
+.OH 'Disc Quotas in a \s-2UNIX\s+2 Environment''SMM:4-%'
+.ND 5th July, 1983
+.TL
+Disc Quotas in a \s-2UNIX\s+2\s-3\u*\d\s0 Environment
+.FS
+* UNIX is a trademark of Bell Laboratories.
+.FE
+.AU
+Robert Elz
+.AI
+Department of Computer Science
+University of Melbourne,
+Parkville,
+Victoria,
+Australia.
+.AB
+.PP
+In most computing environments, disc space is not
+infinite.
+The disc quota system provides a mechanism
+to control usage of disc space, on an
+individual basis.
+.PP
+Quotas may be set for each individual user, on any, or
+all filesystems.
+.PP
+The quota system will warn users when they
+exceed their allotted limit, but allow some
+extra space for current work.
+Repeatedly remaining over quota at logout,
+will cause a fatal over quota condition eventually.
+.PP
+The quota system is an optional part of
+\s-2VMUNIX\s0 that may be included when the
+system is configured.
+.AE
+.NH 1
+Users' view of disc quotas
+.PP
+To most users, disc quotas will either be of no concern,
+or a fact of life that cannot be avoided.
+The
+\fIquota\fP\|(1)
+command will provide information on any disc quotas
+that may have been imposed upon a user.
+.PP
+There are two individual possible quotas that may be
+imposed, usually if one is, both will be.
+A limit can be set on the amount of space a user
+can occupy, and there may be a limit on the number
+of files (inodes) he can own.
+.PP
+.I Quota
+provides information on the quotas that have
+been set by the system administrators, in each
+of these areas, and current usage.
+.PP
+There are four numbers for each limit, the current
+usage, soft limit (quota), hard limit, and number
+of remaining login warnings.
+The soft limit is the number of 1K blocks (or files)
+that the user is expected to remain below.
+Each time the user's usage goes past this limit,
+he will be warned.
+The hard limit cannot be exceeded.
+If a user's usage reaches this number, further
+requests for space (or attempts to create a file)
+will fail with an EDQUOT error, and the first time
+this occurs, a message will be written to the user's
+terminal.
+Only one message will be output, until space occupied
+is reduced below the limit, and reaches it again,
+in order to avoid continual noise from those
+programs that ignore write errors.
+.PP
+Whenever a user logs in with a usage greater than
+his soft limit, he will be warned, and his login
+warning count decremented.
+When he logs in under quota, the counter is reset
+to its maximum value (which is a system configuration
+parameter, that is typically 3).
+If the warning count should ever reach zero (caused
+by three successive logins over quota), the
+particular limit that has been exceeded will be treated
+as if the hard limit has been reached, and no
+more resources will be allocated to the user.
+The \fBonly\fP way to reset this condition is
+to reduce usage below quota, then log in again.
+.NH 2
+Surviving when quota limit is reached
+.PP
+In most cases, the only way to recover from over
+quota conditions, is to abort whatever activity was in progress
+on the filesystem that has reached its limit, remove
+sufficient files to bring the limit back below quota,
+and retry the failed program.
+.PP
+However, if you are in the editor and a write fails
+because of an over quota situation, that is not
+a suitable course of action, as it is most likely
+that initially attempting to write the file
+will have truncated its previous contents, so should
+the editor be aborted without correctly writing the
+file not only will the recent changes be lost, but
+possibly much, or even all, of the data
+that previously existed.
+.PP
+There are several possible safe exits for a user
+caught in this situation.
+He may use the editor \fB!\fP shell escape command to
+examine his file space, and remove surplus files.
+Alternatively, using \fIcsh\fP, he may suspend the
+editor, remove some files, then resume it.
+A third possibility, is to write the file to
+some other filesystem (perhaps to a file on /tmp)
+where the user's quota has not been exceeded.
+Then after rectifying the quota situation,
+the file can be moved back to the filesystem
+it belongs on.
+.NH 1
+Administering the quota system
+.PP
+To set up and establish the disc quota system,
+there are several steps necessary to be performed
+by the system administrator.
+.PP
+First, the system must be configured to include
+the disc quota sub-system.
+This is done by including the line:
+.DS
+options QUOTA
+.DE
+in the system configuration file, then running
+\fIconfig\fP\|(8)
+followed by a system configuration\s-3\u*\d\s0.
+.FS
+* See also the document ``Building 4.2BSD UNIX Systems with Config''.
+.FE
+.PP
+Second, a decision as to what filesystems need to have
+quotas applied needs to be made.
+Usually, only filesystems that house users' home directories,
+or other user files, will need to be subjected to
+the quota system, though it may also prove useful to
+also include \fB/usr\fR.
+If possible, \fB/tmp\fP should usually be free of quotas.
+.PP
+Having decided on which filesystems quotas need to be
+set upon, the administrator should then allocate the
+available space amongst the competing needs. How this
+should be done is (way) beyond the scope of this document.
+.PP
+Then, the
+\fIedquota\fP\|(8)
+command can be used to actually set the limits desired upon
+each user. Where a number of users are to be given the
+same quotas (a common occurrence) the \fB\-p\fP switch
+to edquota will allow this to be easily accomplished.
+.PP
+Once the quotas are set, ready to operate, the system
+must be informed to enforce quotas on the desired filesystems.
+This is accomplished with the
+\fIquotaon\fP\|(8)
+command.
+.I Quotaon
+will either enable quotas for a particular filesystem, or
+with the \fB\-a\fP switch, will enable quotas for each
+filesystem indicated in \fB/etc/fstab\fP as using quotas.
+See
+\fIfstab\fP\|(5)
+for details.
+Most sites using the quota system, will include the
+line
+.DS C
+/etc/quotaon -a
+.DE
+in \fB/etc/rc.local\fP.
+.PP
+Should quotas need to be disabled, the
+\fIquotaoff\fP(8)
+command will do that, however, should the filesystem be
+about to be dismounted, the
+\fIumount\fP\|(8)
+command will disable quotas immediately before the
+filesystem is unmounted.
+This is actually an effect of the
+\fIumount\fP\|(2)
+system call, and it guarantees that the quota system
+will not be disabled if the umount would fail
+because the filesystem is not idle.
+.PP
+Periodically (certainly after each reboot, and when quotas
+are first enabled for a filesystem), the records retained
+in the quota file should be checked for consistency with
+the actual number of blocks and files allocated to
+the user.
+The
+\fIquotacheck\fP\|(8)
+command can be used to accomplish this.
+It is not necessary to dismount the filesystem, or disable
+the quota system to run this command, though on
+active filesystems inaccurate results may occur.
+This does no real harm in most cases, another run of
+.I quotacheck
+when the filesystem is idle will certainly correct any inaccuracy.
+.PP
+The super-user may use the
+\fIquota\fP\|(1)
+command to examine the usage and quotas of any user, and
+the
+\fIrepquota\fP\|(8)
+command may be used to check the usages and limits for
+all users on a filesystem.
+.NH 1
+Some implementation detail.
+.PP
+Disc quota usage and information is stored in a file on the
+filesystem that the quotas are to be applied to.
+Conventionally, this file is \fBquotas\fR in the root of
+the filesystem.
+While this name is not known to the system in any way,
+several of the user level utilities "know" it, and
+choosing any other name would not be wise.
+.PP
+The data in the file comprises an array of structures, indexed
+by uid, one structure for each user on the system (whether
+the user has a quota on this filesystem or not).
+If the uid space is sparse, then the file may have holes
+in it, which would be lost by copying, so it is best to
+avoid this.
+.PP
+The system is informed of the existence of the quota
+file by the
+\fIsetquota\fP\|(2)
+system call.
+It then reads the quota entries for each user currently
+active, then for any files open owned by users who
+are not currently active.
+Each subsequent open of a file on the filesystem, will
+be accompanied by a pairing with its quota information.
+In most cases this information will be retained in core,
+either because the user who owns the file is running some
+process, because other files are open owned by the same
+user, or because some file (perhaps this one) was recently
+accessed.
+In memory, the quota information is kept hashed by user-id
+and filesystem, and retained in an LRU chain so recently
+released data can be easily reclaimed.
+Information about those users whose last process has
+recently terminated is also retained in this way.
+.PP
+Each time a block is accessed or released, and each time an inode
+is allocated or freed, the quota system gets told
+about it, and in the case of allocations, gets the
+opportunity to object.
+.PP
+Measurements have shown
+that the quota code uses a very small percentage of the system
+cpu time consumed in writing a new block to disc.
+.NH 1
+Acknowledgments
+.PP
+The current disc quota system is loosely based upon a very
+early scheme implemented at the University of New South
+Wales, and Sydney University in the mid 70's. That system
+implemented a single combined limit for both files and blocks
+on all filesystems.
+.PP
+A later system was implemented at the University of Melbourne
+by the author, but was not kept highly accurately, eg:
+chown's (etc) did not affect quotas, nor did i/o to a file
+other than one owned by the instigator.
+.PP
+The current system has been running (with only minor modifications)
+since January 82 at Melbourne.
+It is actually just a small part of a much broader resource
+control scheme, which is capable of controlling almost
+anything that is usually uncontrolled in unix. The rest
+of this is, as yet, still in a state where it is far too
+subject to change to be considered for distribution.
+.PP
+For the 4.2BSD release, much work has been done to clean
+up and sanely incorporate the quota code by Sam Leffler and
+Kirk McKusick at The University of California at Berkeley.
diff --git a/share/doc/smm/05.fastfs/0.t b/share/doc/smm/05.fastfs/0.t
new file mode 100644
index 0000000..9cc759b
--- /dev/null
+++ b/share/doc/smm/05.fastfs/0.t
@@ -0,0 +1,159 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 8.1 (Berkeley) 6/8/93
+.\"
+.EQ
+delim $$
+.EN
+.if n .ND
+.TL
+A Fast File System for UNIX*
+.EH 'SMM:05-%''A Fast File System for \s-2UNIX\s+2'
+.OH 'A Fast File System for \s-2UNIX\s+2''SMM:05-%'
+.AU
+Marshall Kirk McKusick, William N. Joy\(dg,
+Samuel J. Leffler\(dd, Robert S. Fabry
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, CA 94720
+.AB
+.FS
+* UNIX is a trademark of Bell Laboratories.
+.FE
+.FS
+\(dg William N. Joy is currently employed by:
+Sun Microsystems, Inc, 2550 Garcia Avenue, Mountain View, CA 94043
+.FE
+.FS
+\(dd Samuel J. Leffler is currently employed by:
+Lucasfilm Ltd., PO Box 2009, San Rafael, CA 94912
+.FE
+.FS
+This work was done under grants from
+the National Science Foundation under grant MCS80-05144,
+and the Defense Advance Research Projects Agency (DoD) under
+ARPA Order No. 4031 monitored by Naval Electronic System Command under
+Contract No. N00039-82-C-0235.
+.FE
+A reimplementation of the UNIX file system is described.
+The reimplementation provides substantially higher throughput
+rates by using more flexible allocation policies
+that allow better locality of reference and can
+be adapted to a wide range of peripheral and processor characteristics.
+The new file system clusters data that is sequentially accessed
+and provides two block sizes to allow fast access to large files
+while not wasting large amounts of space for small files.
+File access rates of up to ten times faster than the traditional
+UNIX file system are experienced.
+Long needed enhancements to the programmers'
+interface are discussed.
+These include a mechanism to place advisory locks on files,
+extensions of the name space across file systems,
+the ability to use long file names,
+and provisions for administrative control of resource usage.
+.sp
+.LP
+Revised February 18, 1984
+.AE
+.LP
+.sp 2
+CR Categories and Subject Descriptors:
+D.4.3
+.B "[Operating Systems]":
+File Systems Management \-
+.I "file organization, directory structures, access methods";
+D.4.2
+.B "[Operating Systems]":
+Storage Management \-
+.I "allocation/deallocation strategies, secondary storage devices";
+D.4.8
+.B "[Operating Systems]":
+Performance \-
+.I "measurements, operational analysis";
+H.3.2
+.B "[Information Systems]":
+Information Storage \-
+.I "file organization"
+.sp
+Additional Keywords and Phrases:
+UNIX,
+file system organization,
+file system performance,
+file system design,
+application program interface.
+.sp
+General Terms:
+file system,
+measurement,
+performance.
+.bp
+.ce
+.B "TABLE OF CONTENTS"
+.LP
+.sp 1
+.nf
+.B "1. Introduction"
+.LP
+.sp .5v
+.nf
+.B "2. Old file system
+.LP
+.sp .5v
+.nf
+.B "3. New file system organization
+3.1. Optimizing storage utilization
+3.2. File system parameterization
+3.3. Layout policies
+.LP
+.sp .5v
+.nf
+.B "4. Performance
+.LP
+.sp .5v
+.nf
+.B "5. File system functional enhancements
+5.1. Long file names
+5.2. File locking
+5.3. Symbolic links
+5.4. Rename
+5.5. Quotas
+.LP
+.sp .5v
+.nf
+.B Acknowledgements
+.LP
+.sp .5v
+.nf
+.B References
diff --git a/share/doc/smm/05.fastfs/1.t b/share/doc/smm/05.fastfs/1.t
new file mode 100644
index 0000000..dbdafc4
--- /dev/null
+++ b/share/doc/smm/05.fastfs/1.t
@@ -0,0 +1,112 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 8.1 (Berkeley) 6/8/93
+.\"
+.ds RH Introduction
+.NH
+Introduction
+.PP
+This paper describes the changes from the original 512 byte UNIX file
+system to the new one released with the 4.2 Berkeley Software Distribution.
+It presents the motivations for the changes,
+the methods used to effect these changes,
+the rationale behind the design decisions,
+and a description of the new implementation.
+This discussion is followed by a summary of
+the results that have been obtained,
+directions for future work,
+and the additions and changes
+that have been made to the facilities that are
+available to programmers.
+.PP
+The original UNIX system that runs on the PDP-11\(dg
+.FS
+\(dg DEC, PDP, VAX, MASSBUS, and UNIBUS are
+trademarks of Digital Equipment Corporation.
+.FE
+has simple and elegant file system facilities. File system input/output
+is buffered by the kernel;
+there are no alignment constraints on
+data transfers and all operations are made to appear synchronous.
+All transfers to the disk are in 512 byte blocks, which can be placed
+arbitrarily within the data area of the file system. Virtually
+no constraints other than available disk space are placed on file growth
+[Ritchie74], [Thompson78].*
+.FS
+* In practice, a file's size is constrained to be less than about
+one gigabyte.
+.FE
+.PP
+When used on the VAX-11 together with other UNIX enhancements,
+the original 512 byte UNIX file
+system is incapable of providing the data throughput rates
+that many applications require.
+For example,
+applications
+such as VLSI design and image processing
+do a small amount of processing
+on a large quantities of data and
+need to have a high throughput from the file system.
+High throughput rates are also needed by programs
+that map files from the file system into large virtual
+address spaces.
+Paging data in and out of the file system is likely
+to occur frequently [Ferrin82b].
+This requires a file system providing
+higher bandwidth than the original 512 byte UNIX
+one that provides only about
+two percent of the maximum disk bandwidth or about
+20 kilobytes per second per arm [White80], [Smith81b].
+.PP
+Modifications have been made to the UNIX file system to improve
+its performance.
+Since the UNIX file system interface
+is well understood and not inherently slow,
+this development retained the abstraction and simply changed
+the underlying implementation to increase its throughput.
+Consequently, users of the system have not been faced with
+massive software conversion.
+.PP
+Problems with file system performance have been dealt with
+extensively in the literature; see [Smith81a] for a survey.
+Previous work to improve the UNIX file system performance has been
+done by [Ferrin82a].
+The UNIX operating system drew many of its ideas from Multics,
+a large, high performance operating system [Feiertag71].
+Other work includes Hydra [Almes78],
+Spice [Thompson80],
+and a file system for a LISP environment [Symbolics81].
+A good introduction to the physical latencies of disks is
+described in [Pechura83].
+.ds RH Old file system
+.sp 2
+.ne 1i
diff --git a/share/doc/smm/05.fastfs/2.t b/share/doc/smm/05.fastfs/2.t
new file mode 100644
index 0000000..33d9ade
--- /dev/null
+++ b/share/doc/smm/05.fastfs/2.t
@@ -0,0 +1,143 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.t 8.1 (Berkeley) 6/8/93
+.\"
+.ds RH Old file system
+.NH
+Old File System
+.PP
+In the file system developed at Bell Laboratories
+(the ``traditional'' file system),
+each disk drive is divided into one or more
+partitions. Each of these disk partitions may contain
+one file system. A file system never spans multiple
+partitions.\(dg
+.FS
+\(dg By ``partition'' here we refer to the subdivision of
+physical space on a disk drive. In the traditional file
+system, as in the new file system, file systems are really
+located in logical disk partitions that may overlap. This
+overlapping is made available, for example,
+to allow programs to copy entire disk drives containing multiple
+file systems.
+.FE
+A file system is described by its super-block,
+which contains the basic parameters of the file system.
+These include the number of data blocks in the file system,
+a count of the maximum number of files,
+and a pointer to the \fIfree list\fP, a linked
+list of all the free blocks in the file system.
+.PP
+Within the file system are files.
+Certain files are distinguished as directories and contain
+pointers to files that may themselves be directories.
+Every file has a descriptor associated with it called an
+.I "inode".
+An inode contains information describing ownership of the file,
+time stamps marking last modification and access times for the file,
+and an array of indices that point to the data blocks for the file.
+For the purposes of this section, we assume that the first 8 blocks
+of the file are directly referenced by values stored
+in an inode itself*.
+.FS
+* The actual number may vary from system to system, but is usually in
+the range 5-13.
+.FE
+An inode may also contain references to indirect blocks
+containing further data block indices.
+In a file system with a 512 byte block size, a singly indirect
+block contains 128 further block addresses,
+a doubly indirect block contains 128 addresses of further singly indirect
+blocks,
+and a triply indirect block contains 128 addresses of further doubly indirect
+blocks.
+.PP
+A 150 megabyte traditional UNIX file system consists
+of 4 megabytes of inodes followed by 146 megabytes of data.
+This organization segregates the inode information from the data;
+thus accessing a file normally incurs a long seek from the
+file's inode to its data.
+Files in a single directory are not typically allocated
+consecutive slots in the 4 megabytes of inodes,
+causing many non-consecutive blocks of inodes
+to be accessed when executing
+operations on the inodes of several files in a directory.
+.PP
+The allocation of data blocks to files is also suboptimum.
+The traditional
+file system never transfers more than 512 bytes per disk transaction
+and often finds that the next sequential data block is not on the same
+cylinder, forcing seeks between 512 byte transfers.
+The combination of the small block size,
+limited read-ahead in the system,
+and many seeks severely limits file system throughput.
+.PP
+The first work at Berkeley on the UNIX file system attempted to improve both
+reliability and throughput.
+The reliability was improved by staging modifications
+to critical file system information so that they could
+either be completed or repaired cleanly by a program
+after a crash [Kowalski78].
+The file system performance was improved by a factor of more than two by
+changing the basic block size from 512 to 1024 bytes.
+The increase was because of two factors:
+each disk transfer accessed twice as much data,
+and most files could be described without need to access
+indirect blocks since the direct blocks contained twice as much data.
+The file system with these changes will henceforth be referred to as the
+.I "old file system."
+.PP
+This performance improvement gave a strong indication that
+increasing the block size was a good method for improving
+throughput.
+Although the throughput had doubled,
+the old file system was still using only about
+four percent of the disk bandwidth.
+The main problem was that although the free list was initially
+ordered for optimal access,
+it quickly became scrambled as files were created and removed.
+Eventually the free list became entirely random,
+causing files to have their blocks allocated randomly over the disk.
+This forced a seek before every block access.
+Although old file systems provided transfer rates of up
+to 175 kilobytes per second when they were first created,
+this rate deteriorated to 30 kilobytes per second after a
+few weeks of moderate use because of this
+randomization of data block placement.
+There was no way of restoring the performance of an old file system
+except to dump, rebuild, and restore the file system.
+Another possibility, as suggested by [Maruyama76],
+would be to have a process that periodically
+reorganized the data on the disk to restore locality.
+.ds RH New file system
+.sp 2
+.ne 1i
diff --git a/share/doc/smm/05.fastfs/3.t b/share/doc/smm/05.fastfs/3.t
new file mode 100644
index 0000000..0ab1196
--- /dev/null
+++ b/share/doc/smm/05.fastfs/3.t
@@ -0,0 +1,594 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)3.t 8.1 (Berkeley) 6/8/93
+.\"
+.ds RH New file system
+.NH
+New file system organization
+.PP
+In the new file system organization (as in the
+old file system organization),
+each disk drive contains one or more file systems.
+A file system is described by its super-block,
+located at the beginning of the file system's disk partition.
+Because the super-block contains critical data,
+it is replicated to protect against catastrophic loss.
+This is done when the file system is created;
+since the super-block data does not change,
+the copies need not be referenced unless a head crash
+or other hard disk error causes the default super-block
+to be unusable.
+.PP
+To insure that it is possible to create files as large as
+$2 sup 32$ bytes with only two levels of indirection,
+the minimum size of a file system block is 4096 bytes.
+The size of file system blocks can be any power of two
+greater than or equal to 4096.
+The block size of a file system is recorded in the
+file system's super-block
+so it is possible for file systems with different block sizes
+to be simultaneously accessible on the same system.
+The block size must be decided at the time that
+the file system is created;
+it cannot be subsequently changed without rebuilding the file system.
+.PP
+The new file system organization divides a disk partition
+into one or more areas called
+.I "cylinder groups".
+A cylinder group is comprised of one or more consecutive
+cylinders on a disk.
+Associated with each cylinder group is some bookkeeping information
+that includes a redundant copy of the super-block,
+space for inodes,
+a bit map describing available blocks in the cylinder group,
+and summary information describing the usage of data blocks
+within the cylinder group.
+The bit map of available blocks in the cylinder group replaces
+the traditional file system's free list.
+For each cylinder group a static number of inodes
+is allocated at file system creation time.
+The default policy is to allocate one inode for each 2048
+bytes of space in the cylinder group, expecting this
+to be far more than will ever be needed.
+.PP
+All the cylinder group bookkeeping information could be
+placed at the beginning of each cylinder group.
+However if this approach were used,
+all the redundant information would be on the top platter.
+A single hardware failure that destroyed the top platter
+could cause the loss of all redundant copies of the super-block.
+Thus the cylinder group bookkeeping information
+begins at a varying offset from the beginning of the cylinder group.
+The offset for each successive cylinder group is calculated to be
+about one track further from the beginning of the cylinder group
+than the preceding cylinder group.
+In this way the redundant
+information spirals down into the pack so that any single track, cylinder,
+or platter can be lost without losing all copies of the super-block.
+Except for the first cylinder group,
+the space between the beginning of the cylinder group
+and the beginning of the cylinder group information
+is used for data blocks.\(dg
+.FS
+\(dg While it appears that the first cylinder group could be laid
+out with its super-block at the ``known'' location,
+this would not work for file systems
+with blocks sizes of 16 kilobytes or greater.
+This is because of a requirement that the first 8 kilobytes of the disk
+be reserved for a bootstrap program and a separate requirement that
+the cylinder group information begin on a file system block boundary.
+To start the cylinder group on a file system block boundary,
+file systems with block sizes larger than 8 kilobytes
+would have to leave an empty space between the end of
+the boot block and the beginning of the cylinder group.
+Without knowing the size of the file system blocks,
+the system would not know what roundup function to use
+to find the beginning of the first cylinder group.
+.FE
+.NH 2
+Optimizing storage utilization
+.PP
+Data is laid out so that larger blocks can be transferred
+in a single disk transaction, greatly increasing file system throughput.
+As an example, consider a file in the new file system
+composed of 4096 byte data blocks.
+In the old file system this file would be composed of 1024 byte blocks.
+By increasing the block size, disk accesses in the new file
+system may transfer up to four times as much information per
+disk transaction.
+In large files, several
+4096 byte blocks may be allocated from the same cylinder so that
+even larger data transfers are possible before requiring a seek.
+.PP
+The main problem with
+larger blocks is that most UNIX
+file systems are composed of many small files.
+A uniformly large block size wastes space.
+Table 1 shows the effect of file system
+block size on the amount of wasted space in the file system.
+The files measured to obtain these figures reside on
+one of our time sharing
+systems that has roughly 1.2 gigabytes of on-line storage.
+The measurements are based on the active user file systems containing
+about 920 megabytes of formatted space.
+.KF
+.DS B
+.TS
+box;
+l|l|l
+a|n|l.
+Space used % waste Organization
+_
+775.2 Mb 0.0 Data only, no separation between files
+807.8 Mb 4.2 Data only, each file starts on 512 byte boundary
+828.7 Mb 6.9 Data + inodes, 512 byte block UNIX file system
+866.5 Mb 11.8 Data + inodes, 1024 byte block UNIX file system
+948.5 Mb 22.4 Data + inodes, 2048 byte block UNIX file system
+1128.3 Mb 45.6 Data + inodes, 4096 byte block UNIX file system
+.TE
+Table 1 \- Amount of wasted space as a function of block size.
+.DE
+.KE
+The space wasted is calculated to be the percentage of space
+on the disk not containing user data.
+As the block size on the disk
+increases, the waste rises quickly, to an intolerable
+45.6% waste with 4096 byte file system blocks.
+.PP
+To be able to use large blocks without undue waste,
+small files must be stored in a more efficient way.
+The new file system accomplishes this goal by allowing the division
+of a single file system block into one or more
+.I "fragments".
+The file system fragment size is specified
+at the time that the file system is created;
+each file system block can optionally be broken into
+2, 4, or 8 fragments, each of which is addressable.
+The lower bound on the size of these fragments is constrained
+by the disk sector size,
+typically 512 bytes.
+The block map associated with each cylinder group
+records the space available in a cylinder group
+at the fragment level;
+to determine if a block is available, aligned fragments are examined.
+Figure 1 shows a piece of a map from a 4096/1024 file system.
+.KF
+.DS B
+.TS
+box;
+l|c c c c.
+Bits in map XXXX XXOO OOXX OOOO
+Fragment numbers 0-3 4-7 8-11 12-15
+Block numbers 0 1 2 3
+.TE
+Figure 1 \- Example layout of blocks and fragments in a 4096/1024 file system.
+.DE
+.KE
+Each bit in the map records the status of a fragment;
+an ``X'' shows that the fragment is in use,
+while a ``O'' shows that the fragment is available for allocation.
+In this example,
+fragments 0\-5, 10, and 11 are in use,
+while fragments 6\-9, and 12\-15 are free.
+Fragments of adjoining blocks cannot be used as a full block,
+even if they are large enough.
+In this example,
+fragments 6\-9 cannot be allocated as a full block;
+only fragments 12\-15 can be coalesced into a full block.
+.PP
+On a file system with a block size of 4096 bytes
+and a fragment size of 1024 bytes,
+a file is represented by zero or more 4096 byte blocks of data,
+and possibly a single fragmented block.
+If a file system block must be fragmented to obtain
+space for a small amount of data,
+the remaining fragments of the block are made
+available for allocation to other files.
+As an example consider an 11000 byte file stored on
+a 4096/1024 byte file system.
+This file would uses two full size blocks and one
+three fragment portion of another block.
+If no block with three aligned fragments is
+available at the time the file is created,
+a full size block is split yielding the necessary
+fragments and a single unused fragment.
+This remaining fragment can be allocated to another file as needed.
+.PP
+Space is allocated to a file when a program does a \fIwrite\fP
+system call.
+Each time data is written to a file, the system checks to see if
+the size of the file has increased*.
+.FS
+* A program may be overwriting data in the middle of an existing file
+in which case space would already have been allocated.
+.FE
+If the file needs to be expanded to hold the new data,
+one of three conditions exists:
+.IP 1)
+There is enough space left in an already allocated
+block or fragment to hold the new data.
+The new data is written into the available space.
+.IP 2)
+The file contains no fragmented blocks (and the last
+block in the file
+contains insufficient space to hold the new data).
+If space exists in a block already allocated,
+the space is filled with new data.
+If the remainder of the new data contains more than
+a full block of data, a full block is allocated and
+the first full block of new data is written there.
+This process is repeated until less than a full block
+of new data remains.
+If the remaining new data to be written will
+fit in less than a full block,
+a block with the necessary fragments is located,
+otherwise a full block is located.
+The remaining new data is written into the located space.
+.IP 3)
+The file contains one or more fragments (and the
+fragments contain insufficient space to hold the new data).
+If the size of the new data plus the size of the data
+already in the fragments exceeds the size of a full block,
+a new block is allocated.
+The contents of the fragments are copied
+to the beginning of the block
+and the remainder of the block is filled with new data.
+The process then continues as in (2) above.
+Otherwise, if the new data to be written will
+fit in less than a full block,
+a block with the necessary fragments is located,
+otherwise a full block is located.
+The contents of the existing fragments
+appended with the new data
+are written into the allocated space.
+.PP
+The problem with expanding a file one fragment at a
+a time is that data may be copied many times as a
+fragmented block expands to a full block.
+Fragment reallocation can be minimized
+if the user program writes a full block at a time,
+except for a partial block at the end of the file.
+Since file systems with different block sizes may reside on
+the same system,
+the file system interface has been extended to provide
+application programs the optimal size for a read or write.
+For files the optimal size is the block size of the file system
+on which the file is being accessed.
+For other objects, such as pipes and sockets,
+the optimal size is the underlying buffer size.
+This feature is used by the Standard
+Input/Output Library,
+a package used by most user programs.
+This feature is also used by
+certain system utilities such as archivers and loaders
+that do their own input and output management
+and need the highest possible file system bandwidth.
+.PP
+The amount of wasted space in the 4096/1024 byte new file system
+organization is empirically observed to be about the same as in the
+1024 byte old file system organization.
+A file system with 4096 byte blocks and 512 byte fragments
+has about the same amount of wasted space as the 512 byte
+block UNIX file system.
+The new file system uses less space
+than the 512 byte or 1024 byte
+file systems for indexing information for
+large files and the same amount of space
+for small files.
+These savings are offset by the need to use
+more space for keeping track of available free blocks.
+The net result is about the same disk utilization
+when a new file system's fragment size
+equals an old file system's block size.
+.PP
+In order for the layout policies to be effective,
+a file system cannot be kept completely full.
+For each file system there is a parameter, termed
+the free space reserve, that
+gives the minimum acceptable percentage of file system
+blocks that should be free.
+If the number of free blocks drops below this level
+only the system administrator can continue to allocate blocks.
+The value of this parameter may be changed at any time,
+even when the file system is mounted and active.
+The transfer rates that appear in section 4 were measured on file
+systems kept less than 90% full (a reserve of 10%).
+If the number of free blocks falls to zero,
+the file system throughput tends to be cut in half,
+because of the inability of the file system to localize
+blocks in a file.
+If a file system's performance degrades because
+of overfilling, it may be restored by removing
+files until the amount of free space once again
+reaches the minimum acceptable level.
+Access rates for files created during periods of little
+free space may be restored by moving their data once enough
+space is available.
+The free space reserve must be added to the
+percentage of waste when comparing the organizations given
+in Table 1.
+Thus, the percentage of waste in
+an old 1024 byte UNIX file system is roughly
+comparable to a new 4096/512 byte file system
+with the free space reserve set at 5%.
+(Compare 11.8% wasted with the old file system
+to 6.9% waste + 5% reserved space in the
+new file system.)
+.NH 2
+File system parameterization
+.PP
+Except for the initial creation of the free list,
+the old file system ignores the parameters of the underlying hardware.
+It has no information about either the physical characteristics
+of the mass storage device,
+or the hardware that interacts with it.
+A goal of the new file system is to parameterize the
+processor capabilities and
+mass storage characteristics
+so that blocks can be allocated in an
+optimum configuration-dependent way.
+Parameters used include the speed of the processor,
+the hardware support for mass storage transfers,
+and the characteristics of the mass storage devices.
+Disk technology is constantly improving and
+a given installation can have several different disk technologies
+running on a single processor.
+Each file system is parameterized so that it can be
+adapted to the characteristics of the disk on which
+it is placed.
+.PP
+For mass storage devices such as disks,
+the new file system tries to allocate new blocks
+on the same cylinder as the previous block in the same file.
+Optimally, these new blocks will also be
+rotationally well positioned.
+The distance between ``rotationally optimal'' blocks varies greatly;
+it can be a consecutive block
+or a rotationally delayed block
+depending on system characteristics.
+On a processor with an input/output channel that does not require
+any processor intervention between mass storage transfer requests,
+two consecutive disk blocks can often be accessed
+without suffering lost time because of an intervening disk revolution.
+For processors without input/output channels,
+the main processor must field an interrupt and
+prepare for a new disk transfer.
+The expected time to service this interrupt and
+schedule a new disk transfer depends on the
+speed of the main processor.
+.PP
+The physical characteristics of each disk include
+the number of blocks per track and the rate at which
+the disk spins.
+The allocation routines use this information to calculate
+the number of milliseconds required to skip over a block.
+The characteristics of the processor include
+the expected time to service an interrupt and schedule a
+new disk transfer.
+Given a block allocated to a file,
+the allocation routines calculate the number of blocks to
+skip over so that the next block in the file will
+come into position under the disk head in the expected
+amount of time that it takes to start a new
+disk transfer operation.
+For programs that sequentially access large amounts of data,
+this strategy minimizes the amount of time spent waiting for
+the disk to position itself.
+.PP
+To ease the calculation of finding rotationally optimal blocks,
+the cylinder group summary information includes
+a count of the available blocks in a cylinder
+group at different rotational positions.
+Eight rotational positions are distinguished,
+so the resolution of the
+summary information is 2 milliseconds for a typical 3600
+revolution per minute drive.
+The super-block contains a vector of lists called
+.I "rotational layout tables".
+The vector is indexed by rotational position.
+Each component of the vector
+lists the index into the block map for every data block contained
+in its rotational position.
+When looking for an allocatable block,
+the system first looks through the summary counts for a rotational
+position with a non-zero block count.
+It then uses the index of the rotational position to find the appropriate
+list to use to index through
+only the relevant parts of the block map to find a free block.
+.PP
+The parameter that defines the
+minimum number of milliseconds between the completion of a data
+transfer and the initiation of
+another data transfer on the same cylinder
+can be changed at any time,
+even when the file system is mounted and active.
+If a file system is parameterized to lay out blocks with
+a rotational separation of 2 milliseconds,
+and the disk pack is then moved to a system that has a
+processor requiring 4 milliseconds to schedule a disk operation,
+the throughput will drop precipitously because of lost disk revolutions
+on nearly every block.
+If the eventual target machine is known,
+the file system can be parameterized for it
+even though it is initially created on a different processor.
+Even if the move is not known in advance,
+the rotational layout delay can be reconfigured after the disk is moved
+so that all further allocation is done based on the
+characteristics of the new host.
+.NH 2
+Layout policies
+.PP
+The file system layout policies are divided into two distinct parts.
+At the top level are global policies that use file system
+wide summary information to make decisions regarding
+the placement of new inodes and data blocks.
+These routines are responsible for deciding the
+placement of new directories and files.
+They also calculate rotationally optimal block layouts,
+and decide when to force a long seek to a new cylinder group
+because there are insufficient blocks left
+in the current cylinder group to do reasonable layouts.
+Below the global policy routines are
+the local allocation routines that use a locally optimal scheme to
+lay out data blocks.
+.PP
+Two methods for improving file system performance are to increase
+the locality of reference to minimize seek latency
+as described by [Trivedi80], and
+to improve the layout of data to make larger transfers possible
+as described by [Nevalainen77].
+The global layout policies try to improve performance
+by clustering related information.
+They cannot attempt to localize all data references,
+but must also try to spread unrelated data
+among different cylinder groups.
+If too much localization is attempted,
+the local cylinder group may run out of space
+forcing the data to be scattered to non-local cylinder groups.
+Taken to an extreme,
+total localization can result in a single huge cluster of data
+resembling the old file system.
+The global policies try to balance the two conflicting
+goals of localizing data that is concurrently accessed
+while spreading out unrelated data.
+.PP
+One allocatable resource is inodes.
+Inodes are used to describe both files and directories.
+Inodes of files in the same directory are frequently accessed together.
+For example, the ``list directory'' command often accesses
+the inode for each file in a directory.
+The layout policy tries to place all the inodes of
+files in a directory in the same cylinder group.
+To ensure that files are distributed throughout the disk,
+a different policy is used for directory allocation.
+A new directory is placed in a cylinder group that has a greater
+than average number of free inodes,
+and the smallest number of directories already in it.
+The intent of this policy is to allow the inode clustering policy
+to succeed most of the time.
+The allocation of inodes within a cylinder group is done using a
+next free strategy.
+Although this allocates the inodes randomly within a cylinder group,
+all the inodes for a particular cylinder group can be read with
+8 to 16 disk transfers.
+(At most 16 disk transfers are required because a cylinder
+group may have no more than 2048 inodes.)
+This puts a small and constant upper bound on the number of
+disk transfers required to access the inodes
+for all the files in a directory.
+In contrast, the old file system typically requires
+one disk transfer to fetch the inode for each file in a directory.
+.PP
+The other major resource is data blocks.
+Since data blocks for a file are typically accessed together,
+the policy routines try to place all data
+blocks for a file in the same cylinder group,
+preferably at rotationally optimal positions in the same cylinder.
+The problem with allocating all the data blocks
+in the same cylinder group is that large files will
+quickly use up available space in the cylinder group,
+forcing a spill over to other areas.
+Further, using all the space in a cylinder group
+causes future allocations for any file in the cylinder group
+to also spill to other areas.
+Ideally none of the cylinder groups should ever become completely full.
+The heuristic solution chosen is to
+redirect block allocation
+to a different cylinder group
+when a file exceeds 48 kilobytes,
+and at every megabyte thereafter.*
+.FS
+* The first spill over point at 48 kilobytes is the point
+at which a file on a 4096 byte block file system first
+requires a single indirect block. This appears to be
+a natural first point at which to redirect block allocation.
+The other spillover points are chosen with the intent of
+forcing block allocation to be redirected when a
+file has used about 25% of the data blocks in a cylinder group.
+In observing the new file system in day to day use, the heuristics appear
+to work well in minimizing the number of completely filled
+cylinder groups.
+.FE
+The newly chosen cylinder group is selected from those cylinder
+groups that have a greater than average number of free blocks left.
+Although big files tend to be spread out over the disk,
+a megabyte of data is typically accessible before
+a long seek must be performed,
+and the cost of one long seek per megabyte is small.
+.PP
+The global policy routines call local allocation routines with
+requests for specific blocks.
+The local allocation routines will
+always allocate the requested block
+if it is free, otherwise it
+allocates a free block of the requested size that is
+rotationally closest to the requested block.
+If the global layout policies had complete information,
+they could always request unused blocks and
+the allocation routines would be reduced to simple bookkeeping.
+However, maintaining complete information is costly;
+thus the implementation of the global layout policy
+uses heuristics that employ only partial information.
+.PP
+If a requested block is not available, the local allocator uses
+a four level allocation strategy:
+.IP 1)
+Use the next available block rotationally closest
+to the requested block on the same cylinder. It is assumed
+here that head switching time is zero. On disk
+controllers where this is not the case, it may be possible
+to incorporate the time required to switch between disk platters
+when constructing the rotational layout tables. This, however,
+has not yet been tried.
+.IP 2)
+If there are no blocks available on the same cylinder,
+use a block within the same cylinder group.
+.IP 3)
+If that cylinder group is entirely full,
+quadratically hash the cylinder group number to choose
+another cylinder group to look for a free block.
+.IP 4)
+Finally if the hash fails, apply an exhaustive search
+to all cylinder groups.
+.PP
+Quadratic hash is used because of its speed in finding
+unused slots in nearly full hash tables [Knuth75].
+File systems that are parameterized to maintain at least
+10% free space rarely use this strategy.
+File systems that are run without maintaining any free
+space typically have so few free blocks that almost any
+allocation is random;
+the most important characteristic of
+the strategy used under such conditions is that the strategy be fast.
+.ds RH Performance
+.sp 2
+.ne 1i
diff --git a/share/doc/smm/05.fastfs/4.t b/share/doc/smm/05.fastfs/4.t
new file mode 100644
index 0000000..15e3923
--- /dev/null
+++ b/share/doc/smm/05.fastfs/4.t
@@ -0,0 +1,252 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)4.t 8.1 (Berkeley) 6/8/93
+.\"
+.ds RH Performance
+.NH
+Performance
+.PP
+Ultimately, the proof of the effectiveness of the
+algorithms described in the previous section
+is the long term performance of the new file system.
+.PP
+Our empirical studies have shown that the inode layout policy has
+been effective.
+When running the ``list directory'' command on a large directory
+that itself contains many directories (to force the system
+to access inodes in multiple cylinder groups),
+the number of disk accesses for inodes is cut by a factor of two.
+The improvements are even more dramatic for large directories
+containing only files,
+disk accesses for inodes being cut by a factor of eight.
+This is most encouraging for programs such as spooling daemons that
+access many small files,
+since these programs tend to flood the
+disk request queue on the old file system.
+.PP
+Table 2 summarizes the measured throughput of the new file system.
+Several comments need to be made about the conditions under which these
+tests were run.
+The test programs measure the rate at which user programs can transfer
+data to or from a file without performing any processing on it.
+These programs must read and write enough data to
+insure that buffering in the
+operating system does not affect the results.
+They are also run at least three times in succession;
+the first to get the system into a known state
+and the second two to insure that the
+experiment has stabilized and is repeatable.
+The tests used and their results are
+discussed in detail in [Kridle83]\(dg.
+.FS
+\(dg A UNIX command that is similar to the reading test that we used is
+``cp file /dev/null'', where ``file'' is eight megabytes long.
+.FE
+The systems were running multi-user but were otherwise quiescent.
+There was no contention for either the CPU or the disk arm.
+The only difference between the UNIBUS and MASSBUS tests
+was the controller.
+All tests used an AMPEX Capricorn 330 megabyte Winchester disk.
+As Table 2 shows, all file system test runs were on a VAX 11/750.
+All file systems had been in production use for at least
+a month before being measured.
+The same number of system calls were performed in all tests;
+the basic system call overhead was a negligible portion of
+the total running time of the tests.
+.KF
+.DS B
+.TS
+box;
+c c|c s s
+c c|c c c.
+Type of Processor and Read
+File System Bus Measured Speed Bandwidth % CPU
+_
+old 1024 750/UNIBUS 29 Kbytes/sec 29/983 3% 11%
+new 4096/1024 750/UNIBUS 221 Kbytes/sec 221/983 22% 43%
+new 8192/1024 750/UNIBUS 233 Kbytes/sec 233/983 24% 29%
+new 4096/1024 750/MASSBUS 466 Kbytes/sec 466/983 47% 73%
+new 8192/1024 750/MASSBUS 466 Kbytes/sec 466/983 47% 54%
+.TE
+.ce 1
+Table 2a \- Reading rates of the old and new UNIX file systems.
+.TS
+box;
+c c|c s s
+c c|c c c.
+Type of Processor and Write
+File System Bus Measured Speed Bandwidth % CPU
+_
+old 1024 750/UNIBUS 48 Kbytes/sec 48/983 5% 29%
+new 4096/1024 750/UNIBUS 142 Kbytes/sec 142/983 14% 43%
+new 8192/1024 750/UNIBUS 215 Kbytes/sec 215/983 22% 46%
+new 4096/1024 750/MASSBUS 323 Kbytes/sec 323/983 33% 94%
+new 8192/1024 750/MASSBUS 466 Kbytes/sec 466/983 47% 95%
+.TE
+.ce 1
+Table 2b \- Writing rates of the old and new UNIX file systems.
+.DE
+.KE
+.PP
+Unlike the old file system,
+the transfer rates for the new file system do not
+appear to change over time.
+The throughput rate is tied much more strongly to the
+amount of free space that is maintained.
+The measurements in Table 2 were based on a file system
+with a 10% free space reserve.
+Synthetic work loads suggest that throughput deteriorates
+to about half the rates given in Table 2 when the file
+systems are full.
+.PP
+The percentage of bandwidth given in Table 2 is a measure
+of the effective utilization of the disk by the file system.
+An upper bound on the transfer rate from the disk is calculated
+by multiplying the number of bytes on a track by the number
+of revolutions of the disk per second.
+The bandwidth is calculated by comparing the data rates
+the file system is able to achieve as a percentage of this rate.
+Using this metric, the old file system is only
+able to use about 3\-5% of the disk bandwidth,
+while the new file system uses up to 47%
+of the bandwidth.
+.PP
+Both reads and writes are faster in the new system than in the old system.
+The biggest factor in this speedup is because of the larger
+block size used by the new file system.
+The overhead of allocating blocks in the new system is greater
+than the overhead of allocating blocks in the old system,
+however fewer blocks need to be allocated in the new system
+because they are bigger.
+The net effect is that the cost per byte allocated is about
+the same for both systems.
+.PP
+In the new file system, the reading rate is always at least
+as fast as the writing rate.
+This is to be expected since the kernel must do more work when
+allocating blocks than when simply reading them.
+Note that the write rates are about the same
+as the read rates in the 8192 byte block file system;
+the write rates are slower than the read rates in the 4096 byte block
+file system.
+The slower write rates occur because
+the kernel has to do twice as many disk allocations per second,
+making the processor unable to keep up with the disk transfer rate.
+.PP
+In contrast the old file system is about 50%
+faster at writing files than reading them.
+This is because the write system call is asynchronous and
+the kernel can generate disk transfer
+requests much faster than they can be serviced,
+hence disk transfers queue up in the disk buffer cache.
+Because the disk buffer cache is sorted by minimum seek distance,
+the average seek between the scheduled disk writes is much
+less than it would be if the data blocks were written out
+in the random disk order in which they are generated.
+However when the file is read,
+the read system call is processed synchronously so
+the disk blocks must be retrieved from the disk in the
+non-optimal seek order in which they are requested.
+This forces the disk scheduler to do long
+seeks resulting in a lower throughput rate.
+.PP
+In the new system the blocks of a file are more optimally
+ordered on the disk.
+Even though reads are still synchronous,
+the requests are presented to the disk in a much better order.
+Even though the writes are still asynchronous,
+they are already presented to the disk in minimum seek
+order so there is no gain to be had by reordering them.
+Hence the disk seek latencies that limited the old file system
+have little effect in the new file system.
+The cost of allocation is the factor in the new system that
+causes writes to be slower than reads.
+.PP
+The performance of the new file system is currently
+limited by memory to memory copy operations
+required to move data from disk buffers in the
+system's address space to data buffers in the user's
+address space. These copy operations account for
+about 40% of the time spent performing an input/output operation.
+If the buffers in both address spaces were properly aligned,
+this transfer could be performed without copying by
+using the VAX virtual memory management hardware.
+This would be especially desirable when transferring
+large amounts of data.
+We did not implement this because it would change the
+user interface to the file system in two major ways:
+user programs would be required to allocate buffers on page boundaries,
+and data would disappear from buffers after being written.
+.PP
+Greater disk throughput could be achieved by rewriting the disk drivers
+to chain together kernel buffers.
+This would allow contiguous disk blocks to be read
+in a single disk transaction.
+Many disks used with UNIX systems contain either
+32 or 48 512 byte sectors per track.
+Each track holds exactly two or three 8192 byte file system blocks,
+or four or six 4096 byte file system blocks.
+The inability to use contiguous disk blocks
+effectively limits the performance
+on these disks to less than 50% of the available bandwidth.
+If the next block for a file cannot be laid out contiguously,
+then the minimum spacing to the next allocatable
+block on any platter is between a sixth and a half a revolution.
+The implication of this is that the best possible layout without
+contiguous blocks uses only half of the bandwidth of any given track.
+If each track contains an odd number of sectors,
+then it is possible to resolve the rotational delay to any number of sectors
+by finding a block that begins at the desired
+rotational position on another track.
+The reason that block chaining has not been implemented is because it
+would require rewriting all the disk drivers in the system,
+and the current throughput rates are already limited by the
+speed of the available processors.
+.PP
+Currently only one block is allocated to a file at a time.
+A technique used by the DEMOS file system
+when it finds that a file is growing rapidly,
+is to preallocate several blocks at once,
+releasing them when the file is closed if they remain unused.
+By batching up allocations, the system can reduce the
+overhead of allocating at each write,
+and it can cut down on the number of disk writes needed to
+keep the block pointers on the disk
+synchronized with the block allocation [Powell79].
+This technique was not included because block allocation
+currently accounts for less than 10% of the time spent in
+a write system call and, once again, the
+current throughput rates are already limited by the speed
+of the available processors.
+.ds RH Functional enhancements
+.sp 2
+.ne 1i
diff --git a/share/doc/smm/05.fastfs/5.t b/share/doc/smm/05.fastfs/5.t
new file mode 100644
index 0000000..96d721a
--- /dev/null
+++ b/share/doc/smm/05.fastfs/5.t
@@ -0,0 +1,293 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)5.t 8.1 (Berkeley) 6/8/93
+.\"
+.ds RH Functional enhancements
+.NH
+File system functional enhancements
+.PP
+The performance enhancements to the
+UNIX file system did not require
+any changes to the semantics or
+data structures visible to application programs.
+However, several changes had been generally desired for some
+time but had not been introduced because they would require users to
+dump and restore all their file systems.
+Since the new file system already
+required all existing file systems to
+be dumped and restored,
+these functional enhancements were introduced at this time.
+.NH 2
+Long file names
+.PP
+File names can now be of nearly arbitrary length.
+Only programs that read directories are affected by this change.
+To promote portability to UNIX systems that
+are not running the new file system, a set of directory
+access routines have been introduced to provide a consistent
+interface to directories on both old and new systems.
+.PP
+Directories are allocated in 512 byte units called chunks.
+This size is chosen so that each allocation can be transferred
+to disk in a single operation.
+Chunks are broken up into variable length records termed
+directory entries. A directory entry
+contains the information necessary to map the name of a
+file to its associated inode.
+No directory entry is allowed to span multiple chunks.
+The first three fields of a directory entry are fixed length
+and contain: an inode number, the size of the entry, and the length
+of the file name contained in the entry.
+The remainder of an entry is variable length and contains
+a null terminated file name, padded to a 4 byte boundary.
+The maximum length of a file name in a directory is
+currently 255 characters.
+.PP
+Available space in a directory is recorded by having
+one or more entries accumulate the free space in their
+entry size fields. This results in directory entries
+that are larger than required to hold the
+entry name plus fixed length fields. Space allocated
+to a directory should always be completely accounted for
+by totaling up the sizes of its entries.
+When an entry is deleted from a directory,
+its space is returned to a previous entry
+in the same directory chunk by increasing the size of the
+previous entry by the size of the deleted entry.
+If the first entry of a directory chunk is free, then
+the entry's inode number is set to zero to indicate
+that it is unallocated.
+.NH 2
+File locking
+.PP
+The old file system had no provision for locking files.
+Processes that needed to synchronize the updates of a
+file had to use a separate ``lock'' file.
+A process would try to create a ``lock'' file.
+If the creation succeeded, then the process
+could proceed with its update;
+if the creation failed, then the process would wait and try again.
+This mechanism had three drawbacks.
+Processes consumed CPU time by looping over attempts to create locks.
+Locks left lying around because of system crashes had
+to be manually removed (normally in a system startup command script).
+Finally, processes running as system administrator
+are always permitted to create files,
+so were forced to use a different mechanism.
+While it is possible to get around all these problems,
+the solutions are not straight forward,
+so a mechanism for locking files has been added.
+.PP
+The most general schemes allow multiple processes
+to concurrently update a file.
+Several of these techniques are discussed in [Peterson83].
+A simpler technique is to serialize access to a file with locks.
+To attain reasonable efficiency,
+certain applications require the ability to lock pieces of a file.
+Locking down to the byte level has been implemented in the
+Onyx file system by [Bass81].
+However, for the standard system applications,
+a mechanism that locks at the granularity of a file is sufficient.
+.PP
+Locking schemes fall into two classes,
+those using hard locks and those using advisory locks.
+The primary difference between advisory locks and hard locks is the
+extent of enforcement.
+A hard lock is always enforced when a program tries to
+access a file;
+an advisory lock is only applied when it is requested by a program.
+Thus advisory locks are only effective when all programs accessing
+a file use the locking scheme.
+With hard locks there must be some override
+policy implemented in the kernel.
+With advisory locks the policy is left to the user programs.
+In the UNIX system, programs with system administrator
+privilege are allowed override any protection scheme.
+Because many of the programs that need to use locks must
+also run as the system administrator,
+we chose to implement advisory locks rather than
+create an additional protection scheme that was inconsistent
+with the UNIX philosophy or could
+not be used by system administration programs.
+.PP
+The file locking facilities allow cooperating programs to apply
+advisory
+.I shared
+or
+.I exclusive
+locks on files.
+Only one process may have an exclusive
+lock on a file while multiple shared locks may be present.
+Both shared and exclusive locks cannot be present on
+a file at the same time.
+If any lock is requested when
+another process holds an exclusive lock,
+or an exclusive lock is requested when another process holds any lock,
+the lock request will block until the lock can be obtained.
+Because shared and exclusive locks are advisory only,
+even if a process has obtained a lock on a file,
+another process may access the file.
+.PP
+Locks are applied or removed only on open files.
+This means that locks can be manipulated without
+needing to close and reopen a file.
+This is useful, for example, when a process wishes
+to apply a shared lock, read some information
+and determine whether an update is required, then
+apply an exclusive lock and update the file.
+.PP
+A request for a lock will cause a process to block if the lock
+can not be immediately obtained.
+In certain instances this is unsatisfactory.
+For example, a process that
+wants only to check if a lock is present would require a separate
+mechanism to find out this information.
+Consequently, a process may specify that its locking
+request should return with an error if a lock can not be immediately
+obtained.
+Being able to conditionally request a lock
+is useful to ``daemon'' processes
+that wish to service a spooling area.
+If the first instance of the
+daemon locks the directory where spooling takes place,
+later daemon processes can
+easily check to see if an active daemon exists.
+Since locks exist only while the locking processes exist,
+lock files can never be left active after
+the processes exit or if the system crashes.
+.PP
+Almost no deadlock detection is attempted.
+The only deadlock detection done by the system is that the file
+to which a lock is applied must not already have a
+lock of the same type (i.e. the second of two successive calls
+to apply a lock of the same type will fail).
+.NH 2
+Symbolic links
+.PP
+The traditional UNIX file system allows multiple
+directory entries in the same file system
+to reference a single file. Each directory entry
+``links'' a file's name to an inode and its contents.
+The link concept is fundamental;
+inodes do not reside in directories, but exist separately and
+are referenced by links.
+When all the links to an inode are removed,
+the inode is deallocated.
+This style of referencing an inode does
+not allow references across physical file
+systems, nor does it support inter-machine linkage.
+To avoid these limitations
+.I "symbolic links"
+similar to the scheme used by Multics [Feiertag71] have been added.
+.PP
+A symbolic link is implemented as a file that contains a pathname.
+When the system encounters a symbolic link while
+interpreting a component of a pathname,
+the contents of the symbolic link is prepended to the rest
+of the pathname, and this name is interpreted to yield the
+resulting pathname.
+In UNIX, pathnames are specified relative to the root
+of the file system hierarchy, or relative to a process's
+current working directory. Pathnames specified relative
+to the root are called absolute pathnames. Pathnames
+specified relative to the current working directory are
+termed relative pathnames.
+If a symbolic link contains an absolute pathname,
+the absolute pathname is used,
+otherwise the contents of the symbolic link is evaluated
+relative to the location of the link in the file hierarchy.
+.PP
+Normally programs do not want to be aware that there is a
+symbolic link in a pathname that they are using.
+However certain system utilities
+must be able to detect and manipulate symbolic links.
+Three new system calls provide the ability to detect, read, and write
+symbolic links; seven system utilities required changes
+to use these calls.
+.PP
+In future Berkeley software distributions
+it may be possible to reference file systems located on
+remote machines using pathnames. When this occurs,
+it will be possible to create symbolic links that span machines.
+.NH 2
+Rename
+.PP
+Programs that create a new version of an existing
+file typically create the
+new version as a temporary file and then rename the temporary file
+with the name of the target file.
+In the old UNIX file system renaming required three calls to the system.
+If a program were interrupted or the system crashed between these calls,
+the target file could be left with only its temporary name.
+To eliminate this possibility the \fIrename\fP system call
+has been added. The rename call does the rename operation
+in a fashion that guarantees the existence of the target name.
+.PP
+Rename works both on data files and directories.
+When renaming directories,
+the system must do special validation checks to insure
+that the directory tree structure is not corrupted by the creation
+of loops or inaccessible directories.
+Such corruption would occur if a parent directory were moved
+into one of its descendants.
+The validation check requires tracing the descendents of the target
+directory to insure that it does not include the directory being moved.
+.NH 2
+Quotas
+.PP
+The UNIX system has traditionally attempted to share all available
+resources to the greatest extent possible.
+Thus any single user can allocate all the available space
+in the file system.
+In certain environments this is unacceptable.
+Consequently, a quota mechanism has been added for restricting the
+amount of file system resources that a user can obtain.
+The quota mechanism sets limits on both the number of inodes
+and the number of disk blocks that a user may allocate.
+A separate quota can be set for each user on each file system.
+Resources are given both a hard and a soft limit.
+When a program exceeds a soft limit,
+a warning is printed on the users terminal;
+the offending program is not terminated
+unless it exceeds its hard limit.
+The idea is that users should stay below their soft limit between
+login sessions,
+but they may use more resources while they are actively working.
+To encourage this behavior,
+users are warned when logging in if they are over
+any of their soft limits.
+If users fails to correct the problem for too many login sessions,
+they are eventually reprimanded by having their soft limit
+enforced as their hard limit.
+.ds RH Acknowledgements
+.sp 2
+.ne 1i
diff --git a/share/doc/smm/05.fastfs/6.t b/share/doc/smm/05.fastfs/6.t
new file mode 100644
index 0000000..40be6aa
--- /dev/null
+++ b/share/doc/smm/05.fastfs/6.t
@@ -0,0 +1,159 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)6.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.ds RH Acknowledgements
+.SH
+\s+2Acknowledgements\s0
+.PP
+We thank Robert Elz for his ongoing interest in the new file system,
+and for adding disk quotas in a rational and efficient manner.
+We also acknowledge Dennis Ritchie for his suggestions
+on the appropriate modifications to the user interface.
+We appreciate Michael Powell's explanations on how
+the DEMOS file system worked;
+many of his ideas were used in this implementation.
+Special commendation goes to Peter Kessler and Robert Henry for acting
+like real users during the early debugging stage when file systems were
+less stable than they should have been.
+The criticisms and suggestions by the reviews contributed significantly
+to the coherence of the paper.
+Finally we thank our sponsors,
+the National Science Foundation under grant MCS80-05144,
+and the Defense Advance Research Projects Agency (DoD) under
+ARPA Order No. 4031 monitored by Naval Electronic System Command under
+Contract No. N00039-82-C-0235.
+.ds RH References
+.nr H2 1
+.sp 2
+.SH
+\s+2References\s0
+.LP
+.IP [Almes78] 20
+Almes, G., and Robertson, G.
+"An Extensible File System for Hydra"
+Proceedings of the Third International Conference on Software Engineering,
+IEEE, May 1978.
+.IP [Bass81] 20
+Bass, J.
+"Implementation Description for File Locking",
+Onyx Systems Inc, 73 E. Trimble Rd, San Jose, CA 95131
+Jan 1981.
+.IP [Feiertag71] 20
+Feiertag, R. J. and Organick, E. I.,
+"The Multics Input-Output System",
+Proceedings of the Third Symposium on Operating Systems Principles,
+ACM, Oct 1971. pp 35-41
+.IP [Ferrin82a] 20
+Ferrin, T.E.,
+"Performance and Robustness Improvements in Version 7 UNIX",
+Computer Graphics Laboratory Technical Report 2,
+School of Pharmacy, University of California,
+San Francisco, January 1982.
+Presented at the 1982 Winter Usenix Conference, Santa Monica, California.
+.IP [Ferrin82b] 20
+Ferrin, T.E.,
+"Performance Issuses of VMUNIX Revisited",
+;login: (The Usenix Association Newsletter), Vol 7, #5, November 1982. pp 3-6
+.IP [Kridle83] 20
+Kridle, R., and McKusick, M.,
+"Performance Effects of Disk Subsystem Choices for
+VAX Systems Running 4.2BSD UNIX",
+Computer Systems Research Group, Dept of EECS, Berkeley, CA 94720,
+Technical Report #8.
+.IP [Kowalski78] 20
+Kowalski, T.
+"FSCK - The UNIX System Check Program",
+Bell Laboratory, Murray Hill, NJ 07974. March 1978
+.IP [Knuth75] 20
+Kunth, D.
+"The Art of Computer Programming",
+Volume 3 - Sorting and Searching,
+Addison-Wesley Publishing Company Inc, Reading, Mass, 1975. pp 506-549
+.IP [Maruyama76]
+Maruyama, K., and Smith, S.
+"Optimal reorganization of Distributed Space Disk Files",
+CACM, 19, 11. Nov 1976. pp 634-642
+.IP [Nevalainen77] 20
+Nevalainen, O., Vesterinen, M.
+"Determining Blocking Factors for Sequential Files by Heuristic Methods",
+The Computer Journal, 20, 3. Aug 1977. pp 245-247
+.IP [Pechura83] 20
+Pechura, M., and Schoeffler, J.
+"Estimating File Access Time of Floppy Disks",
+CACM, 26, 10. Oct 1983. pp 754-763
+.IP [Peterson83] 20
+Peterson, G.
+"Concurrent Reading While Writing",
+ACM Transactions on Programming Languages and Systems,
+ACM, 5, 1. Jan 1983. pp 46-55
+.IP [Powell79] 20
+Powell, M.
+"The DEMOS File System",
+Proceedings of the Sixth Symposium on Operating Systems Principles,
+ACM, Nov 1977. pp 33-42
+.IP [Ritchie74] 20
+Ritchie, D. M. and Thompson, K.,
+"The UNIX Time-Sharing System",
+CACM 17, 7. July 1974. pp 365-375
+.IP [Smith81a] 20
+Smith, A.
+"Input/Output Optimization and Disk Architectures: A Survey",
+Performance and Evaluation 1. Jan 1981. pp 104-117
+.IP [Smith81b] 20
+Smith, A.
+"Bibliography on File and I/O System Optimization and Related Topics",
+Operating Systems Review, 15, 4. Oct 1981. pp 39-54
+.IP [Symbolics81] 20
+"Symbolics File System",
+Symbolics Inc, 9600 DeSoto Ave, Chatsworth, CA 91311
+Aug 1981.
+.IP [Thompson78] 20
+Thompson, K.
+"UNIX Implementation",
+Bell System Technical Journal, 57, 6, part 2. pp 1931-1946
+July-August 1978.
+.IP [Thompson80] 20
+Thompson, M.
+"Spice File System",
+Carnegie-Mellon University,
+Department of Computer Science, Pittsburg, PA 15213
+#CMU-CS-80, Sept 1980.
+.IP [Trivedi80] 20
+Trivedi, K.
+"Optimal Selection of CPU Speed, Device Capabilities, and File Assignments",
+Journal of the ACM, 27, 3. July 1980. pp 457-473
+.IP [White80] 20
+White, R. M.
+"Disk Storage Technology",
+Scientific American, 243(2), August 1980.
diff --git a/share/doc/smm/05.fastfs/Makefile b/share/doc/smm/05.fastfs/Makefile
new file mode 100644
index 0000000..059887e
--- /dev/null
+++ b/share/doc/smm/05.fastfs/Makefile
@@ -0,0 +1,10 @@
+# @(#)Makefile 8.1 (Berkeley) 6/8/93
+
+DIR= smm/05.fastfs
+SRCS= 0.t 1.t 2.t 3.t 4.t 5.t 6.t
+MACROS= -ms
+
+paper.ps: ${SRCS}
+ ${TBL} ${SRCS} | ${EQN} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/smm/06.nfs/0.t b/share/doc/smm/06.nfs/0.t
new file mode 100644
index 0000000..4d77f56
--- /dev/null
+++ b/share/doc/smm/06.nfs/0.t
@@ -0,0 +1,75 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This document is derived from software contributed to Berkeley by
+.\" Rick Macklem at The University of Guelph.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 8.1 (Berkeley) 6/8/93
+.\"
+.(l C
+.sz 14
+.b "The 4.4BSD NFS Implementation"
+.sp
+.sz 10
+Rick Macklem
+.i "University of Guelph"
+.)l
+.sp 2
+.ce 1
+.sz 12
+.b "ABSTRACT"
+.eh 'SMM:06-%''The 4.4BSD NFS Implementation'
+.oh 'The 4.4BSD NFS Implementation''SMM:06-%'
+.pp
+The 4.4BSD implementation of the Network File System (NFS)\** is
+intended to interoperate with
+.(f
+\**Network File System (NFS) is believed to be a registered trademark of
+Sun Microsystems Inc.
+.)f
+other NFS Version 2 Protocol (RFC1094) implementations but also
+allows use of an alternate protocol that is hoped to provide better
+performance in certain environments.
+This paper will informally discuss these various protocol features and
+their use.
+There is a brief overview of the implementation followed
+by several sections on various problem areas related to NFS
+and some hints on how to deal with them.
+.pp
+Not Quite NFS (NQNFS) is an NFS like protocol designed to maintain full cache
+consistency between clients in a crash tolerant manner. It is an adaptation
+of the NFS protocol such that the server supports both NFS
+and NQNFS clients while maintaining full consistency between the server and
+NQNFS clients.
+It borrows heavily from work done on Spritely-NFS [Srinivasan89], but uses
+Leases [Gray89] to avoid the need to recover server state information
+after a crash.
+.sp
diff --git a/share/doc/smm/06.nfs/1.t b/share/doc/smm/06.nfs/1.t
new file mode 100644
index 0000000..6804ed1
--- /dev/null
+++ b/share/doc/smm/06.nfs/1.t
@@ -0,0 +1,588 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This document is derived from software contributed to Berkeley by
+.\" Rick Macklem at The University of Guelph.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh 1 "NFS Implementation"
+.pp
+The 4.4BSD implementation of NFS and the alternate protocol nicknamed
+Not Quite NFS (NQNFS) are kernel resident, but make use of a few system
+daemons.
+The kernel implementation does not use an RPC library, handling the RPC
+request and reply messages directly in \fImbuf\fR data areas. NFS
+interfaces to the network using
+sockets via. the kernel interface available in
+\fIsys/kern/uipc_syscalls.c\fR as \fIsosend(), soreceive(),\fR...
+There are connection management routines for support of sockets for connection
+oriented protocols and timeout/retransmit support for datagram sockets on
+the client side.
+For connection oriented transport protocols,
+such as TCP/IP, there is one connection
+for each client to server mount point that is maintained until an umount.
+If the connection breaks, the client will attempt a reconnect with a new
+socket.
+The client side can operate without any daemons running, but performance
+will be improved by running nfsiod daemons that perform read-aheads
+and write-behinds.
+For the server side to function, the daemons portmap, mountd and
+nfsd must be running.
+The mountd daemon performs two important functions.
+.ip 1)
+Upon startup and after a hangup signal, mountd reads the exports
+file and pushes the export information for each local file system down
+into the kernel via. the mount system call.
+.ip 2)
+Mountd handles remote mount protocol (RFC1094, Appendix A) requests.
+.lp
+The nfsd master daemon forks off children that enter the kernel
+via. the nfssvc system call. The children normally remain kernel
+resident, providing a process context for the NFS RPC servers. The only
+exception to this is when a Kerberos [Steiner88]
+ticket is received and at that time
+the nfsd exits the kernel temporarily to verify the ticket via. the
+Kerberos libraries and then returns to the kernel with the results.
+(This only happens for Kerberos mount points as described further under
+Security.)
+Meanwhile, the master nfsd waits to accept new connections from clients
+using connection oriented transport protocols and passes the new sockets down
+into the kernel.
+The client side mount_nfs along with portmap and
+mountd are the only parts of the NFS subsystem that make any
+use of the Sun RPC library.
+.sh 1 "Mount Problems"
+.pp
+There are several problems that can be encountered at the time of an NFS
+mount, ranging from a unresponsive NFS server (crashed, network partitioned
+from client, etc.) to various interoperability problems between different
+NFS implementations.
+.pp
+On the server side,
+if the 4.4BSD NFS server will be handling any PC clients, mountd will
+require the \fB-n\fR option to enable non-root mount request servicing.
+Running of a pcnfsd\** daemon will also be necessary.
+.(f
+\** Pcnfsd is available in source form from Sun Microsystems and many
+anonymous ftp sites.
+.)f
+The server side requires that the daemons
+mountd and nfsd be running and that
+they be registered with portmap properly.
+If problems are encountered,
+the safest fix is to kill all the daemons and then restart them in
+the order portmap, mountd and nfsd.
+Other server side problems are normally caused by problems with the format
+of the exports file, which is covered under
+Security and in the exports man page.
+.pp
+On the client side, there are several mount options useful for dealing
+with server problems.
+In cases where a file system is not critical for system operation, the
+\fB-b\fR
+mount option may be specified so that mount_nfs will go into the
+background for a mount attempt on an unresponsive server.
+This is useful for mounts specified in
+\fIfstab(5)\fR,
+so that the system will not get hung while booting doing
+\fBmount -a\fR
+because a file server is not responsive.
+On the other hand, if the file system is critical to system operation, this
+option should not be used so that the client will wait for the server to
+come up before completing bootstrapping.
+There are also three mount options to help deal with interoperability issues
+with various non-BSD NFS servers. The
+\fB-P\fR
+option specifies that the NFS
+client use a reserved IP port number to satisfy some servers' security
+requirements.\**
+.(f
+\**Any security benefit of this is highly questionable and as
+such the BSD server does not require a client to use a reserved port number.
+.)f
+The
+\fB-c\fR
+option stops the NFS client from doing a \fIconnect\fR on the UDP
+socket, so that the mount works with servers that send NFS replies from
+port numbers other than the standard 2049.\**
+.(f
+\**The Encore Multimax is known
+to require this.
+.)f
+Finally, the
+\fB-g=\fInum\fR
+option sets the maximum size of the group list in the credentials passed
+to an NFS server in every RPC request. Although RFC1057 specifies a maximum
+size of 16 for the group list, some servers can't handle that many.
+If a user, particularly root doing a mount,
+keeps getting access denied from a file server, try temporarily
+reducing the number of groups that user is in to less than 5
+by editing /etc/group. If the user can then access the file system, slowly
+increase the number of groups for that user until the limit is found and
+then peg the limit there with the
+\fB-g=\fInum\fR
+option.
+This implies that the server will only see the first \fInum\fR
+groups that the user is in, which can cause some accessibility problems.
+.pp
+For sites that have many NFS servers, amd [Pendry93]
+is a useful administration tool.
+It also reduces the number of actual NFS mount points, alleviating problems
+with commands such as df(1) that hang when any of the NFS servers is
+unreachable.
+.sh 1 "Dealing with Hung Servers"
+.pp
+There are several mount options available to help a client deal with
+being hung waiting for response from a crashed or unreachable\** server.
+.(f
+\**Due to a network partitioning or similar.
+.)f
+By default, a hard mount will continue to try to contact the server
+``forever'' to complete the system call. This type of mount is appropriate
+when processes on the client that access files in the file system do not
+tolerate file I/O systems calls that return -1 with \fIerrno == EINTR\fR
+and/or access to the file system is critical for normal system operation.
+.lp
+There are two other alternatives:
+.ip 1)
+A soft mount (\fB-s\fR option) retries an RPC \fIn\fR
+times and then the corresponding
+system call returns -1 with errno set to EINTR.
+For TCP transport, the actual RPC request is not retransmitted, but the
+timeout intervals waiting for a reply from the server are done
+in the same manner as UDP for this purpose.
+The problem with this type of mount is that most applications do not
+expect an EINTR error return from file I/O system calls (since it never
+occurs for a local file system) and get confused by the error return
+from the I/O system call.
+The option
+\fB-x=\fInum\fR
+is used to set the RPC retry limit and if set too low, the error returns
+will start occurring whenever the NFS server is slow due to heavy load.
+Alternately, a large retry limit can result in a process hung for a long
+time, due to a crashed server or network partitioning.
+.ip 2)
+An interruptible mount (\fB-i\fR option) checks to see if a termination signal
+is pending for the process when waiting for server response and if it is,
+the I/O system call posts an EINTR. Normally this results in the process
+being terminated by the signal when returning from the system call.
+This feature allows you to ``^C'' out of processes that are hung
+due to unresponsive servers.
+The problem with this approach is that signals that are caught by
+a process are not recognized as termination signals
+and the process will remain hung.\**
+.(f
+\**Unfortunately, there are also some resource allocation situations in the
+BSD kernel where the termination signal will be ignored and the process
+will not terminate.
+.)f
+.sh 1 "RPC Transport Issues"
+.pp
+The NFS Version 2 protocol runs over UDP/IP transport by
+sending each Sun Remote Procedure Call (RFC1057)
+request/reply message in a single UDP
+datagram. Since UDP does not guarantee datagram delivery, the
+Remote Procedure Call (RPC) layer
+times out and retransmits an RPC request if
+no RPC reply has been received. Since this round trip timeout (RTO) value
+is for the entire RPC operation, including RPC message transmission to the
+server, queuing at the server for an nfsd, performing the RPC and
+sending the RPC reply message back to the client, it can be highly variable
+for even a moderately loaded NFS server.
+As a result, the RTO interval must be a conservation (large) estimate, in
+order to avoid extraneous RPC request retransmits.\**
+.(f
+\**At best, an extraneous RPC request retransmit increases
+the load on the server and at worst can result in damaged files
+on the server when non-idempotent RPCs are redone [Juszczak89].
+.)f
+Also, with an 8Kbyte read/write data size
+(the default), the read/write reply/request will be an 8+Kbyte UDP datagram
+that must normally be fragmented at the IP layer for transmission.\**
+.(f
+\**6 IP fragments for an Ethernet,
+which has an maximum transmission unit of 1500bytes.
+.)f
+For IP fragments to be successfully reassembled into
+the IP datagram at the receive end, all
+fragments must be received within a fairly short ``time to live''.
+If one fragment is lost/damaged in transit,
+the entire RPC must be retransmitted and redone.
+This problem can be exaggerated by a network interface on the receiver that
+cannot handle the reception of back to back network packets. [Kent87a]
+.pp
+There are several tuning mount
+options on the client side that can prove useful when trying to
+alleviate performance problems related to UDP RPC transport.
+The options
+\fB-r=\fInum\fR
+and
+\fB-w=\fInum\fR
+specify the maximum read or write data size respectively.
+The size \fInum\fR
+should be a power of 2 (4K, 2K, 1K) and adjusted downward from the
+maximum of 8Kbytes
+whenever IP fragmentation is causing problems. The best indicator of
+IP fragmentation problems is a significant number of
+\fIfragments dropped after timeout\fR
+reported by the \fIip:\fR section of a \fBnetstat -s\fR
+command on either the client or server.
+Of course, if the fragments are being dropped at the server, it can be
+fun figuring out which client(s) are involved.
+The most likely candidates are clients that are not
+on the same local area network as the
+server or have network interfaces that do not receive several
+back to back network packets properly.
+.pp
+By default, the 4.4BSD NFS client dynamically estimates the retransmit
+timeout interval for the RPC and this appears to work reasonably well for
+many environments. However, the
+\fB-d\fR
+flag can be specified to turn off
+the dynamic estimation of retransmit timeout, so that the client will
+use a static initial timeout interval.\**
+.(f
+\**After the first retransmit timeout, the initial interval is backed off
+exponentially.
+.)f
+The
+\fB-t=\fInum\fR
+option can be used with
+\fB-d\fR
+to set the initial timeout interval to other than the default of 2 seconds.
+The best indicator that dynamic estimation should be turned off would
+be a significant number\** in the \fIX Replies\fR field and a
+.(f
+\**Even 0.1% of the total RPCs is probably significant.
+.)f
+large number in the \fIRetries\fR field
+in the \fIRpc Info:\fR section as reported
+by the \fBnfsstat\fR command.
+On the server, there would be significant numbers of \fIInprog\fR recent
+request cache hits in the \fIServer Cache Stats:\fR section as reported
+by the \fBnfsstat\fR command, when run on the server.
+.pp
+The tradeoff is that a smaller timeout interval results in a better
+average RPC response time, but increases the risk of extraneous retries
+that in turn increase server load and the possibility of damaged files
+on the server. It is probably best to err on the safe side and use a large
+(>= 2sec) fixed timeout if the dynamic retransmit timeout estimation
+seems to be causing problems.
+.pp
+An alternative to all this fiddling is to run NFS over TCP transport instead
+of UDP.
+Since the 4.4BSD TCP implementation provides reliable
+delivery with congestion control, it avoids all of the above problems.
+It also permits the use of read and write data sizes greater than the 8Kbyte
+limit for UDP transport.\**
+.(f
+\**Read/write data sizes greater than 8Kbytes will not normally improve
+performance unless the kernel constant MAXBSIZE is increased and the
+file system on the server has a block size greater than 8Kbytes.
+.)f
+NFS over TCP usually delivers comparable to significantly better performance
+than NFS over UDP
+unless the client or server processor runs at less than 5-10MIPS. For a
+slow processor, the extra CPU overhead of using TCP transport will become
+significant and TCP transport may only be useful when the client
+to server interconnect traverses congested gateways.
+The main problem with using TCP transport is that it is only supported
+between BSD clients and servers.\**
+.(f
+\**There are rumors of commercial NFS over TCP implementations on the horizon
+and these may well be worth exploring.
+.)f
+.sh 1 "Other Tuning Tricks"
+.pp
+Another mount option that may improve performance over
+certain network interconnects is \fB-a=\fInum\fR
+which sets the number of blocks that the system will
+attempt to read-ahead during sequential reading of a file. The default value
+of 1 seems to be appropriate for most situations, but a larger value might
+achieve better performance for some environments, such as a mount to a server
+across a ``high bandwidth * round trip delay'' interconnect.
+.pp
+For the adventurous, playing with the size of the buffer cache
+can also improve performance for some environments that use NFS heavily.
+Under some workloads, a buffer cache of 4-6Mbytes can result in significant
+performance improvements over 1-2Mbytes, both in client side system call
+response time and reduced server RPC load.
+The buffer cache size defaults to 10% of physical memory,
+but this can be overridden by specifying the BUFPAGES option
+in the machine's config file.\**
+.(f
+BUFPAGES is the number of physical machine pages allocated to the buffer cache.
+ie. BUFPAGES * NBPG = buffer cache size in bytes
+.)f
+When increasing the size of BUFPAGES, it is also advisable to increase the
+number of buffers NBUF by a corresponding amount.
+Note that there is a tradeoff of memory allocated to the buffer cache versus
+available for paging, which implies that making the buffer cache larger
+will increase paging rate, with possibly disastrous results.
+.sh 1 "Security Issues"
+.pp
+When a machine is running an NFS server it opens up a great big security hole.
+For ordinary NFS, the server receives client credentials
+in the RPC request as a user id
+and a list of group ids and trusts them to be authentic!
+The only tool available to restrict remote access to
+file systems with is the exports(5) file,
+so file systems should be exported with great care.
+The exports file is read by mountd upon startup and after a hangup signal
+is posted for it and then as much of the access specifications as possible are
+pushed down into the kernel for use by the nfsd(s).
+The trick here is that the kernel information is stored on a per
+local file system mount point and client host address basis and cannot refer to
+individual directories within the local server file system.
+It is best to think of the exports file as referring to the various local
+file systems and not just directory paths as mount points.
+A local file system may be exported to a specific host, all hosts that
+match a subnet mask or all other hosts (the world). The latter is very
+dangerous and should only be used for public information. It is also
+strongly recommended that file systems exported to ``the world'' be exported
+read-only.
+For each host or group of hosts, the file system can be exported read-only or
+read/write.
+You can also define one of three client user id to server credential
+mappings to help control access.
+Root (user id == 0) can be mapped to some default credentials while all other
+user ids are accepted as given.
+If the default credentials for user id equal zero
+are root, then there is essentially no remapping.
+Most NFS file systems are exported this way, most commonly mapping
+user id == 0 to the credentials for the user nobody.
+Since the client user id and group id list is used unchanged on the server
+(except for root), this also implies that
+the user id and group id space must be common between the client and server.
+(ie. user id N on the client must refer to the same user on the server)
+All user ids can be mapped to a default set of credentials, typically that of
+the user nobody. This essentially gives world access to all
+users on the corresponding hosts.
+.pp
+There is also a non-standard BSD
+\fB-kerb\fR export option that requires the client provide
+a KerberosIV rcmd service ticket to authenticate the user on the server.
+If successful, the Kerberos principal is looked up in the server's password
+and group databases to get a set of credentials and a map of client userid to
+these credentials is then cached.
+The use of TCP transport is strongly recommended,
+since the scheme depends on the TCP connection to avert replay attempts.
+Unfortunately, this option is only usable
+between BSD clients and servers since it is
+not compatible with other known ``kerberized'' NFS systems.
+To enable use of this Kerberos option, both mount_nfs on the client and
+nfsd on the server must be rebuilt with the -DKERBEROS option and
+linked to KerberosIV libraries.
+The file system is then exported to the client(s) with the \fB-kerb\fR option
+in the exports file on the server
+and the client mount specifies the
+\fB-K\fR
+and
+\fB-T\fR
+options.
+The
+\fB-m=\fIrealm\fR
+mount option may be used to specify a Kerberos Realm for the ticket
+(it must be the Kerberos Realm of the server) that is other than
+the client's local Realm.
+To access files in a \fB-kerb\fR mount point, the user must have a valid
+TGT for the server's Realm, as provided by kinit or similar.
+.pp
+As well as the standard NFS Version 2 protocol (RFC1094) implementation, BSD
+systems can use a variant of the protocol called Not Quite NFS (NQNFS) that
+supports a variety of protocol extensions.
+This protocol uses 64bit file offsets
+and sizes, an \fIaccess rpc\fR, an \fIappend\fR option on the write rpc
+and extended file attributes to support 4.4BSD file system functionality
+more fully.
+It also makes use of a variant of short term
+\fIleases\fR [Gray89] with delayed write client caching,
+in an effort to provide full cache consistency and better performance.
+This protocol is available between 4.4BSD systems only and is used when
+the \fB-q\fR mount option is specified.
+It can be used with any of the aforementioned options for NFS, such as TCP
+transport (\fB-T\fR) and KerberosIV authentication (\fB-K\fR).
+Although this protocol is experimental, it is recommended over NFS for
+mounts between 4.4BSD systems.\**
+.(f
+\**I would appreciate email from anyone who can provide
+NFS vs. NQNFS performance measurements,
+particularly fast clients, many clients or over an internetwork
+connection with a large ``bandwidth * RTT'' product.
+.)f
+.sh 1 "Monitoring NFS Activity"
+.pp
+The basic command for monitoring NFS activity on clients and servers is
+nfsstat. It reports cumulative statistics of various NFS activities,
+such as counts of the various different RPCs and cache hit rates on the client
+and server. Of particular interest on the server are the fields in the
+\fIServer Cache Stats:\fR section, which gives numbers for RPC retries received
+in the first three fields and total RPCs in the fourth. The first three fields
+should remain a very small percentage of the total. If not, it
+would indicate one or more clients doing retries too aggressively and the fix
+would be to isolate these clients,
+disable the dynamic RTO estimation on them and
+make their initial timeout interval a conservative (ie. large) value.
+.pp
+On the client side, the fields in the \fIRpc Info:\fR section are of particular
+interest, as they give an overall picture of NFS activity.
+The \fITimedOut\fR field is the number of I/O system calls that returned -1
+for ``soft'' mounts and can be reduced
+by increasing the retry limit or changing
+the mount type to ``intr'' or ``hard''.
+The \fIInvalid\fR field is a count of trashed RPC replies that are received
+and should remain zero.\**
+.(f
+\**Some NFS implementations run with UDP checksums disabled, so garbage RPC
+messages can be received.
+.)f
+The \fIX Replies\fR field counts the number of repeated RPC replies received
+from the server and is a clear indication of a too aggressive RTO estimate.
+Unfortunately, a good NFS server implementation will use a ``recent request
+cache'' [Juszczak89] that will suppress the extraneous replies.
+A large value for \fIRetries\fR indicates a problem, but
+it could be any of:
+.ip \(bu
+a too aggressive RTO estimate
+.ip \(bu
+an overloaded NFS server
+.ip \(bu
+IP fragments being dropped (gateway, client or server)
+.lp
+and requires further investigation.
+The \fIRequests\fR field is the total count of RPCs done on all servers.
+.pp
+The \fBnetstat -s\fR comes in useful during investigation of RPC transport
+problems.
+The field \fIfragments dropped after timeout\fR in
+the \fIip:\fR section indicates IP fragments are
+being lost and a significant number of these occurring indicates that the
+use of TCP transport or a smaller read/write data size is in order.
+A significant number of \fIbad checksums\fR reported in the \fIudp:\fR
+section would suggest network problems of a more generic sort.
+(cabling, transceiver or network hardware interface problems or similar)
+.pp
+There is a RPC activity logging facility for both the client and
+server side in the kernel.
+When logging is enabled by setting the kernel variable nfsrtton to
+one, the logs in the kernel structures nfsrtt (for the client side)
+and nfsdrt (for the server side) are updated upon the completion
+of each RPC in a circular manner.
+The pos element of the structure is the index of the next element
+of the log array to be updated.
+In other words, elements of the log array from \fIlog\fR[pos] to
+\fIlog\fR[pos - 1] are in chronological order.
+The include file <sys/nfsrtt.h> should be consulted for details on the
+fields in the two log structures.\**
+.(f
+\**Unfortunately, a monitoring tool that uses these logs is still in the
+planning (dreaming) stage.
+.)f
+.sh 1 "Diskless Client Support"
+.pp
+The NFS client does include kernel support for diskless/dataless operation
+where the root file system and optionally the swap area is remote NFS mounted.
+A diskless/dataless client is configured using a version of the
+``swapvmunix.c'' file as provided in the directory \fIcontrib/diskless.nfs\fR.
+If the swap device == NODEV, it specifies an NFS mounted swap area and should
+be configured the same size as set up by diskless_setup when run on the server.
+This file must be put in the \fIsys/compile/<machine_name>\fR kernel build
+directory after the config command has been run, since config does
+not know about specifying NFS root and swap areas.
+The kernel variable mountroot must be set to nfs_mountroot instead of
+ffs_mountroot and the kernel structure nfs_diskless must be filled in
+properly.
+There are some primitive system administration tools in the \fIcontrib/diskless.nfs\fR directory to assist in filling in
+the nfs_diskless structure and in setting up an NFS server for
+diskless/dataless clients.
+The tools were designed to provide a bare bones capability, to allow maximum
+flexibility when setting up different servers.
+.lp
+The tools are as follows:
+.ip \(bu
+diskless_offset.c - This little program reads a ``vmunix'' object file and
+writes the file byte offset of the nfs_diskless structure in it to
+standard out. It was kept separate because it sometimes has to
+be compiled/linked in funny ways depending on the client architecture.
+(See the comment at the beginning of it.)
+.ip \(bu
+diskless_setup.c - This program is run on the server and sets up files for a
+given client. It mostly just fills in an nfs_diskless structure and
+writes it out to either the "vmunix" file or a separate file called
+/var/diskless/setup.<official-hostname>
+.ip \(bu
+diskless_boot.c - There are two functions in here that may be used
+by a bootstrap server such as tftpd to permit sharing of the ``vmunix''
+object file for similar clients. This saves disk space on the bootstrap
+server and simplify organization, but are not critical for correct operation.
+They read the ``vmunix''
+file, but optionally fill in the nfs_diskless structure from a
+separate "setup.<official-hostname>" file so that there is only
+one copy of "vmunix" for all similar (same arch etc.) clients.
+These functions use a text file called
+/var/diskless/boot.<official-hostname> to control the netboot.
+.lp
+The basic setup steps are:
+.ip \(bu
+make a "vmunix" for the client(s) with mountroot() == nfs_mountroot()
+and swdevt[0].sw_dev == NODEV if it is to do nfs swapping as well
+(See the same swapvmunix.c file)
+.ip \(bu
+run diskless_offset on the vmunix file to find out the byte offset
+of the nfs_diskless structure
+.ip \(bu
+Run diskless_setup on the server to set up the server and fill in the
+nfs_diskless structure for that client.
+The nfs_diskless structure can either be written into the
+vmunix file (the -x option) or
+saved in /var/diskless/setup.<official-hostname>.
+.ip \(bu
+Set up the bootstrap server. If the nfs_diskless structure was written into
+the ``vmunix'' file, any vanilla bootstrap protocol such as bootp/tftp can
+be used. If the bootstrap server has been modified to use the functions in
+diskless_boot.c, then a
+file called /var/diskless/boot.<official-hostname>
+must be created.
+It is simply a two line text file, where the first line is the pathname
+of the correct ``vmunix'' file and the second line has the pathname of
+the nfs_diskless structure file and its byte offset in it.
+For example:
+.br
+ /var/diskless/vmunix.pmax
+.br
+ /var/diskless/setup.rickers.cis.uoguelph.ca 642308
+.br
+.ip \(bu
+Create a /var subtree for each client in an appropriate place on the server,
+such as /var/diskless/var/<client-hostname>/...
+By using the <client-hostname> to differentiate /var for each host,
+/etc/rc can be modified to mount the correct /var from the server.
diff --git a/share/doc/smm/06.nfs/2.t b/share/doc/smm/06.nfs/2.t
new file mode 100644
index 0000000..841dd5f
--- /dev/null
+++ b/share/doc/smm/06.nfs/2.t
@@ -0,0 +1,530 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This document is derived from software contributed to Berkeley by
+.\" Rick Macklem at The University of Guelph.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh 1 "Not Quite NFS, Crash Tolerant Cache Consistency for NFS"
+.pp
+Not Quite NFS (NQNFS) is an NFS like protocol designed to maintain full cache
+consistency between clients in a crash tolerant manner.
+It is an adaptation of the NFS protocol such that the server supports both NFS
+and NQNFS clients while maintaining full consistency between the server and
+NQNFS clients.
+This section borrows heavily from work done on Spritely-NFS [Srinivasan89],
+but uses Leases [Gray89] to avoid the need to recover server state information
+after a crash.
+The reader is strongly encouraged to read these references before
+trying to grasp the material presented here.
+.sh 2 "Overview"
+.pp
+The protocol maintains cache consistency by using a somewhat
+Sprite [Nelson88] like protocol,
+but is based on short term leases\** instead of hard state information
+about open files.
+.(f
+\** A lease is a ticket permitting an activity that is
+valid until some expiry time.
+.)f
+The basic principal is that the protocol will disable client caching of a
+file whenever that file is write shared\**.
+.(f
+\** Write sharing occurs when at least one client is modifying a file while
+other client(s) are reading the file.
+.)f
+Whenever a client wishes to cache data for a file it must hold a valid lease.
+There are three types of leases: read caching, write caching and non-caching.
+The latter type requires that all file operations be done synchronously with
+the server via. RPCs.
+A read caching lease allows for client data caching, but no file modifications
+may be done.
+A write caching lease allows for client caching of writes,
+but requires that all writes be pushed to the server when the lease expires.
+If a client has dirty buffers\**
+.(f
+\** Cached write data is not yet pushed (written) to the server.
+.)f
+when a write cache lease has almost expired, it will attempt to
+extend the lease but is required to push the dirty buffers if extension fails.
+A client gets leases by either doing a \fBGetLease RPC\fR or by piggybacking
+a \fBGetLease Request\fR onto another RPC. Piggybacking is supported for the
+frequent RPCs Getattr, Setattr, Lookup, Readlink, Read, Write and Readdir
+in an effort to minimize the number of \fBGetLease RPCs\fR required.
+All leases are at the granularity of a file, since all NFS RPCs operate on
+individual files and NFS has no intrinsic notion of a file hierarchy.
+Directories, symbolic links and file attributes may be read cached but
+are not write cached.
+The exception here is the attribute file_size, which is updated during cached
+writing on the client to reflect a growing file.
+.pp
+It is the server's responsibility to ensure that consistency is maintained
+among the NQNFS clients by disabling client caching whenever a server file
+operation would cause inconsistencies.
+The possibility of inconsistencies occurs whenever a client has
+a write caching lease and any other client,
+or local operations on the server,
+tries to access the file or when
+a modify operation is attempted on a file being read cached by client(s).
+At this time, the server sends an \fBeviction notice\fR to all clients holding
+the lease and then waits for lease termination.
+Lease termination occurs when a \fBvacated the premises\fR message has been
+received from all the clients that have signed the lease or when the lease
+expires via. timeout.
+The message pair \fBeviction notice\fR and \fBvacated the premises\fR roughly
+correspond to a Sprite server\(->client callback, but are not implemented as an
+actual RPC, to avoid the server waiting indefinitely for a reply from a dead
+client.
+.pp
+Server consistency checking can be viewed as issuing intrinsic leases for a
+file operation for the duration of the operation only. For example, the
+\fBCreate RPC\fR will get an intrinsic write lease on the directory in which
+the file is being created, disabling client read caches for that directory.
+.pp
+By relegating this responsibility to the server, consistency between the
+server and NQNFS clients is maintained when NFS clients are modifying the
+file system as well.\**
+.(f
+\** The NFS clients will continue to be \fIapproximately\fR consistent with
+the server.
+.)f
+.pp
+The leases are issued as time intervals to avoid the requirement of time of day
+clock synchronization. There are three important time constants known to
+the server. The \fBmaximum_lease_term\fR sets an upper bound on lease duration.
+The \fBclock_skew\fR is added to all lease terms on the server to correct for
+differing clock speeds between the client and server and \fBwrite_slack\fR is
+the number of seconds the server is willing to wait for a client with
+an expired write caching lease to push dirty writes.
+.pp
+The server maintains a \fBmodify_revision\fR number for each file. It is
+defined as a unsigned quadword integer that is never zero and that must
+increase whenever the corresponding file is modified on the server.
+It is used
+by the client to determine whether or not cached data for the file is
+stale.
+Generating this value is easier said than done. The current implementation
+uses the following technique, which is believed to be adequate.
+The high order longword is stored in the ufs inode and is initialized to one
+when an inode is first allocated.
+The low order longword is stored in main memory only and is initialized to
+zero when an inode is read in from disk.
+When the file is modified for the first time within a given second of
+wall clock time, the high order longword is incremented by one and
+the low order longword reset to zero.
+For subsequent modifications within the same second of wall clock
+time, the low order longword is incremented. If the low order longword wraps
+around to zero, the high order longword is incremented again.
+Since the high order longword only increments once per second and the inode
+is pushed to disk frequently during file modification, this implies
+0 \(<= Current\(miDisk \(<= 5.
+When the inode is read in from disk, 10
+is added to the high order longword, which ensures that the quadword
+is greater than any value it could have had before a crash.
+This introduces apparent modifications every time the inode falls out of
+the LRU inode cache, but this should only reduce the client caching performance
+by a (hopefully) small margin.
+.sh 2 "Crash Recovery and other Failure Scenarios"
+.pp
+The server must maintain the state of all the current leases held by clients.
+The nice thing about short term leases is that maximum_lease_term seconds
+after the server stops issuing leases, there are no current leases left.
+As such, server crash recovery does not require any state recovery. After
+rebooting, the server refuses to service any RPCs except for writes until
+write_slack seconds after the last lease would have expired\**.
+.(f
+\** The last lease expiry time may be safely estimated as
+"boottime+maximum_lease_term+clock_skew" for machines that cannot store
+it in nonvolatile RAM.
+.)f
+By then, the server would not have any outstanding leases to recover the
+state of and the clients have had at least write_slack seconds to push dirty
+writes to the server and get the server sync'd up to date. After this, the
+server simply services requests in a manner similar to NFS.
+In an effort to minimize the effect of "recovery storms" [Baker91],
+the server replies \fBtry_again_later\fR to the RPCs it is not
+yet ready to service.
+.pp
+After a client crashes, the server may have to wait for a lease to timeout
+before servicing a request if write sharing of a file with a cachable lease
+on the client is about to occur.
+As for the client, it simply starts up getting any leases it now needs. Any
+outstanding leases for that client on the server prior to the crash will either be renewed or expire
+via timeout.
+.pp
+Certain network partitioning failures are more problematic. If a client to
+server network connection is severed just before a write caching lease expires,
+the client cannot push the dirty writes to the server. After the lease expires
+on the server, the server permits other clients to access the file with the
+potential of getting stale data. Unfortunately I believe this failure scenario
+is intrinsic in any delay write caching scheme unless the server is required to
+wait \fBforever\fR for a client to regain contact\**.
+.(f
+\** Gray and Cheriton avoid this problem by using a \fBwrite through\fR policy.
+.)f
+Since the write caching lease has expired on the client,
+it will sync up with the
+server as soon as the network connection has been re-established.
+.pp
+There is another failure condition that can occur when the server is congested.
+The worst case scenario would have the client pushing dirty writes to the server
+but a large request queue on the server delays these writes for more than
+\fBwrite_slack\fR seconds. It is hoped that a congestion control scheme using
+the \fBtry_again_later\fR RPC reply after booting combined with
+the following lease termination rule for write caching leases
+can minimize the risk of this occurrence.
+A write caching lease is only terminated on the server when there are have
+been no writes to the file and the server has not been overloaded during
+the previous write_slack seconds. The server has not been overloaded
+is approximated by a test for sleeping nfsd(s) at the end of the write_slack
+period.
+.sh 2 "Server Disk Full"
+.pp
+There is a serious unresolved problem for delayed write caching with respect to
+server disk space allocation.
+When the disk on the file server is full, delayed write RPCs can fail
+due to "out of space".
+For NFS, this occurrence results in an error return from the close system
+call on the file, since the dirty blocks are pushed on close.
+Processes writing important files can check for this error return
+to ensure that the file was written successfully.
+For NQNFS, the dirty blocks are not pushed on close and as such the client
+may not attempt the write RPC until after the process has done the close
+which implies no error return from the close.
+For the current prototype,
+the only solution is to modify programs writing important
+file(s) to call fsync and check for an error return from it instead of close.
+.sh 2 "Protocol Details"
+.pp
+The protocol specification is identical to that of NFS [Sun89] except for
+the following changes.
+.ip \(bu
+RPC Information
+.(l
+ Program Number 300105
+ Version Number 1
+.)l
+.ip \(bu
+Readdir_and_Lookup RPC
+.(l
+ struct readdirlookargs {
+ fhandle file;
+ nfscookie cookie;
+ unsigned count;
+ unsigned duration;
+ };
+
+ struct entry {
+ unsigned cachable;
+ unsigned duration;
+ modifyrev rev;
+ fhandle entry_fh;
+ nqnfs_fattr entry_attrib;
+ unsigned fileid;
+ filename name;
+ nfscookie cookie;
+ entry *nextentry;
+ };
+
+ union readdirlookres switch (stat status) {
+ case NFS_OK:
+ struct {
+ entry *entries;
+ bool eof;
+ } readdirlookok;
+ default:
+ void;
+ };
+
+ readdirlookres
+ NQNFSPROC_READDIRLOOK(readdirlookargs) = 18;
+.)l
+Reads entries in a directory in a manner analogous to the NFSPROC_READDIR RPC
+in NFS, but returns the file handle and attributes of each entry as well.
+This allows the attribute and lookup caches to be primed.
+.ip \(bu
+Get Lease RPC
+.(l
+ struct getleaseargs {
+ fhandle file;
+ cachetype readwrite;
+ unsigned duration;
+ };
+
+ union getleaseres switch (stat status) {
+ case NFS_OK:
+ bool cachable;
+ unsigned duration;
+ modifyrev rev;
+ nqnfs_fattr attributes;
+ default:
+ void;
+ };
+
+ getleaseres
+ NQNFSPROC_GETLEASE(getleaseargs) = 19;
+.)l
+Gets a lease for "file" valid for "duration" seconds from when the lease
+was issued on the server\**.
+.(f
+\** To be safe, the client may only assume that the lease is valid
+for ``duration'' seconds from when the RPC request was sent to the server.
+.)f
+The lease permits client caching if "cachable" is true.
+The modify revision level and attributes for the file are also returned.
+.ip \(bu
+Eviction Message
+.(l
+ void
+ NQNFSPROC_EVICTED (fhandle) = 21;
+.)l
+This message is sent from the server to the client. When the client receives
+the message, it should flush data associated with the file represented by
+"fhandle" from its caches and then send the \fBVacated Message\fR back to
+the server. Flushing includes pushing any dirty writes via. write RPCs.
+.ip \(bu
+Vacated Message
+.(l
+ void
+ NQNFSPROC_VACATED (fhandle) = 20;
+.)l
+This message is sent from the client to the server in response to the
+\fBEviction Message\fR. See above.
+.ip \(bu
+Access RPC
+.(l
+ struct accessargs {
+ fhandle file;
+ bool read_access;
+ bool write_access;
+ bool exec_access;
+ };
+
+ stat
+ NQNFSPROC_ACCESS(accessargs) = 22;
+.)l
+The access RPC does permission checking on the server for the given type
+of access required by the client for the file.
+Use of this RPC avoids accessibility problems caused by client->server uid
+mapping.
+.ip \(bu
+Piggybacked Get Lease Request
+.pp
+The piggybacked get lease request is functionally equivalent to the Get Lease
+RPC except that is attached to one of the other NQNFS RPC requests as follows.
+A getleaserequest is prepended to all of the request arguments for NQNFS
+and a getleaserequestres is inserted in all NFS result structures just after
+the "stat" field only if "stat == NFS_OK".
+.(l
+ union getleaserequest switch (cachetype type) {
+ case NQLREAD:
+ case NQLWRITE:
+ unsigned duration;
+ default:
+ void;
+ };
+
+ union getleaserequestres switch (cachetype type) {
+ case NQLREAD:
+ case NQLWRITE:
+ bool cachable;
+ unsigned duration;
+ modifyrev rev;
+ default:
+ void;
+ };
+.)l
+The get lease request applies to the file that the attached RPC operates on
+and the file attributes remain in the same location as for the NFS RPC reply
+structure.
+.ip \(bu
+Three additional "stat" values
+.pp
+Three additional values have been added to the enumerated type "stat".
+.(l
+ NQNFS_EXPIRED=500
+ NQNFS_TRYLATER=501
+ NQNFS_AUTHERR=502
+.)l
+The "expired" value indicates that a lease has expired.
+The "try later"
+value is returned by the server when it wishes the client to retry the
+RPC request after a short delay. It is used during crash recovery (Section 2)
+and may also be useful for server congestion control.
+The "authetication error" value is returned for kerberized mount points to
+indicate that there is no cached authentication mapping and a Kerberos ticket
+for the principal is required.
+.sh 2 "Data Types"
+.ip \(bu
+cachetype
+.(l
+ enum cachetype {
+ NQLNONE = 0,
+ NQLREAD = 1,
+ NQLWRITE = 2
+ };
+.)l
+Type of lease requested. NQLNONE is used to indicate no piggybacked lease
+request.
+.ip \(bu
+modifyrev
+.(l
+ typedef unsigned hyper modifyrev;
+.)l
+The "modifyrev" is a unsigned quadword integer value that is never zero
+and increases every time the corresponding file is modified on the server.
+.ip \(bu
+nqnfs_time
+.(l
+ struct nqnfs_time {
+ unsigned seconds;
+ unsigned nano_seconds;
+ };
+.)l
+For NQNFS times are handled at nano second resolution instead of micro second
+resolution for NFS.
+.ip \(bu
+nqnfs_fattr
+.(l
+ struct nqnfs_fattr {
+ ftype type;
+ unsigned mode;
+ unsigned nlink;
+ unsigned uid;
+ unsigned gid;
+ unsigned hyper size;
+ unsigned blocksize;
+ unsigned rdev;
+ unsigned hyper bytes;
+ unsigned fsid;
+ unsigned fileid;
+ nqnfs_time atime;
+ nqnfs_time mtime;
+ nqnfs_time ctime;
+ unsigned flags;
+ unsigned generation;
+ modifyrev rev;
+ };
+.)l
+The nqnfs_fattr structure is modified from the NFS fattr so that it stores
+the file size as a 64bit quantity and the storage occupied as a 64bit number
+of bytes. It also has fields added for the 4.4BSD va_flags and va_gen fields
+as well as the file's modify rev level.
+.ip \(bu
+nqnfs_sattr
+.(l
+ struct nqnfs_sattr {
+ unsigned mode;
+ unsigned uid;
+ unsigned gid;
+ unsigned hyper size;
+ nqnfs_time atime;
+ nqnfs_time mtime;
+ unsigned flags;
+ unsigned rdev;
+ };
+.)l
+The nqnfs_sattr structure is modified from the NFS sattr structure in the
+same manner as fattr.
+.lp
+The arguments to several of the NFS RPCs have been modified as well. Mostly,
+these are minor changes to use 64bit file offsets or similar. The modified
+argument structures follow.
+.ip \(bu
+Lookup RPC
+.(l
+ struct lookup_diropargs {
+ unsigned duration;
+ fhandle dir;
+ filename name;
+ };
+
+ union lookup_diropres switch (stat status) {
+ case NFS_OK:
+ struct {
+ union getleaserequestres lookup_lease;
+ fhandle file;
+ nqnfs_fattr attributes;
+ } lookup_diropok;
+ default:
+ void;
+ };
+
+.)l
+The additional "duration" argument tells the server to get a lease for the
+name being looked up if it is non-zero and the lease is specified
+in "lookup_lease".
+.ip \(bu
+Read RPC
+.(l
+ struct nqnfs_readargs {
+ fhandle file;
+ unsigned hyper offset;
+ unsigned count;
+ };
+.)l
+.ip \(bu
+Write RPC
+.(l
+ struct nqnfs_writeargs {
+ fhandle file;
+ unsigned hyper offset;
+ bool append;
+ nfsdata data;
+ };
+.)l
+The "append" argument is true for apeend only write operations.
+.ip \(bu
+Get Filesystem Attributes RPC
+.(l
+ union nqnfs_statfsres (stat status) {
+ case NFS_OK:
+ struct {
+ unsigned tsize;
+ unsigned bsize;
+ unsigned blocks;
+ unsigned bfree;
+ unsigned bavail;
+ unsigned files;
+ unsigned files_free;
+ } info;
+ default:
+ void;
+ };
+.)l
+The "files" field is the number of files in the file system and the "files_free"
+is the number of additional files that can be created.
+.sh 1 "Summary"
+.pp
+The configuration and tuning of an NFS environment tends to be a bit of a
+mystic art, but hopefully this paper along with the man pages and other
+reading will be helpful. Good Luck.
diff --git a/share/doc/smm/06.nfs/Makefile b/share/doc/smm/06.nfs/Makefile
new file mode 100644
index 0000000..e36a0a6
--- /dev/null
+++ b/share/doc/smm/06.nfs/Makefile
@@ -0,0 +1,7 @@
+# @(#)Makefile 8.1 (Berkeley) 6/8/93
+
+DIR= smm/06.nfs
+SRCS= 0.t 1.t 2.t ref.t
+MACROS= -me
+
+.include <bsd.doc.mk>
diff --git a/share/doc/smm/06.nfs/ref.t b/share/doc/smm/06.nfs/ref.t
new file mode 100644
index 0000000..039363b
--- /dev/null
+++ b/share/doc/smm/06.nfs/ref.t
@@ -0,0 +1,123 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This document is derived from software contributed to Berkeley by
+.\" Rick Macklem at The University of Guelph.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)ref.t 8.1 (Berkeley) 6/8/93
+.\"
+.sh 1 "Bibliography"
+.ip [Baker91] 16
+Mary Baker and John Ousterhout, Availability in the Sprite Distributed
+File System, In \fIOperating System Review\fR, (25)2, pg. 95-98,
+April 1991.
+.ip [Baker91a] 16
+Mary Baker, Private Email Communication, May 1991.
+.ip [Burrows88] 16
+Michael Burrows, Efficient Data Sharing, Technical Report #153,
+Computer Laboratory, University of Cambridge, Dec. 1988.
+.ip [Gray89] 16
+Cary G. Gray and David R. Cheriton, Leases: An Efficient Fault-Tolerant
+Mechanism for Distributed File Cache Consistency, In \fIProc. of the
+Twelfth ACM Symposium on Operating Systems Principals\fR, Litchfield Park,
+AZ, Dec. 1989.
+.ip [Howard88] 16
+John H. Howard, Michael L. Kazar, Sherri G. Menees, David A. Nichols,
+M. Satyanarayanan, Robert N. Sidebotham and Michael J. West,
+Scale and Performance in a Distributed File System, \fIACM Trans. on
+Computer Systems\fR, (6)1, pg 51-81, Feb. 1988.
+.ip [Juszczak89] 16
+Chet Juszczak, Improving the Performance and Correctness of an NFS Server,
+In \fIProc. Winter 1989 USENIX Conference,\fR pg. 53-63, San Diego, CA, January 1989.
+.ip [Keith90] 16
+Bruce E. Keith, Perspectives on NFS File Server Performance Characterization,
+In \fIProc. Summer 1990 USENIX Conference\fR, pg. 267-277, Anaheim, CA,
+June 1990.
+.ip [Kent87] 16
+Christopher. A. Kent, \fICache Coherence in Distributed Systems\fR,
+Research Report 87/4,
+Digital Equipment Corporation Western Research Laboratory, April 1987.
+.ip [Kent87a] 16
+Christopher. A. Kent and Jeffrey C. Mogul,
+\fIFragmentation Considered Harmful\fR, Research Report 87/3,
+Digital Equipment Corporation Western Research Laboratory, Dec. 1987.
+.ip [Macklem91] 16
+Rick Macklem, Lessons Learned Tuning the 4.3BSD Reno Implementation of the
+NFS Protocol, In \fIProc. Winter USENIX Conference\fR, pg. 53-64,
+Dallas, TX, January 1991.
+.ip [Nelson88] 16
+Michael N. Nelson, Brent B. Welch, and John K. Ousterhout, Caching in the
+Sprite Network File System, \fIACM Transactions on Computer Systems\fR (6)1
+pg. 134-154, February 1988.
+.ip [Nowicki89] 16
+Bill Nowicki, Transport Issues in the Network File System, In
+\fIComputer Communication Review\fR, pg. 16-20, Vol. 19, Number 2, April 1989.
+.ip [Ousterhout90] 16
+John K. Ousterhout, Why Aren't Operating Systems Getting Faster As Fast as
+Hardware? In \fIProc. Summer 1990 USENIX Conference\fR, pg. 247-256, Anaheim,
+CA, June 1990.
+.ip [Pendry93] 16
+Jan-Simon Pendry, 4.4 BSD Automounter Reference Manual, In
+\fIsrc/usr.sbin/amd/doc directory of 4.4 BSD distribution tape\fR.
+.ip [Reid90] 16
+Jim Reid, N(e)FS: the Protocol is the Problem, In
+\fIProc. Summer 1990 UKUUG Conference\fR,
+London, England, July 1990.
+.ip [Sandberg85] 16
+Russel Sandberg, David Goldberg, Steve Kleiman, Dan Walsh, and Bob Lyon,
+Design and Implementation of the Sun Network filesystem, In \fIProc. Summer
+1985 USENIX Conference\fR, pages 119-130, Portland, OR, June 1985.
+.ip [Schroeder85] 16
+Michael D. Schroeder, David K. Gifford and Roger M. Needham, A Caching
+File System For A Programmer's Workstation, In \fIProc. of the Tenth
+ACM Symposium on Operating Systems Principals\fR, pg. 25-34, Orcas Island,
+WA, Dec. 1985.
+.ip [Srinivasan89] 16
+V. Srinivasan and Jeffrey. C. Mogul, \fISpritely NFS: Implementation and
+Performance of Cache-Consistency Protocols\fR, Research Report 89/5,
+Digital Equipment Corporation Western Research Laboratory, May 1989.
+.ip [Steiner88] 16
+Jennifer G. Steiner, Clifford Neuman and Jeffrey I. Schiller,
+Kerberos: An Authentication Service for Open Network Systems, In
+\fIProc. Winter 1988 USENIX Conference\fR, Dallas, TX, February 1988.
+.ip [Stern] 16
+Hal Stern, \fIManaging NFS and NIS\fR, O'Reilly and Associates,
+ISBN 0-937175-75-7.
+.ip [Sun87] 16
+Sun Microsystems Inc., \fIXDR: External Data Representation Standard\fR,
+RFC1014, Network Information Center, SRI International, June 1987.
+.ip [Sun88] 16
+Sun Microsystems Inc., \fIRPC: Remote Procedure Call Protocol Specification Version 2\fR,
+RFC1057, Network Information Center, SRI International, June 1988.
+.ip [Sun89] 16
+Sun Microsystems Inc., \fINFS: Network File System Protocol Specification\fR,
+ARPANET Working Group Requests for Comment, DDN Network Information Center,
+SRI International, Menlo Park, CA, March 1989, RFC-1094.
diff --git a/share/doc/smm/18.net/0.t b/share/doc/smm/18.net/0.t
new file mode 100644
index 0000000..d16e56f
--- /dev/null
+++ b/share/doc/smm/18.net/0.t
@@ -0,0 +1,184 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)0.t 8.1 (Berkeley) 6/10/93
+.\"
+.de IR
+\fI\\$1\fP\\$2
+..
+.if n .ND
+.TL
+Networking Implementation Notes
+.br
+4.4BSD Edition
+.AU
+Samuel J. Leffler, William N. Joy, Robert S. Fabry, and Michael J. Karels
+.AI
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California, Berkeley
+Berkeley, CA 94720
+.AB
+.FS
+* UNIX is a trademark of Bell Laboratories.
+.FE
+This report describes the internal structure of the
+networking facilities developed for the 4.4BSD version
+of the UNIX* operating system
+for the VAX\(dg. These facilities
+.FS
+\(dg DEC, VAX, DECnet, and UNIBUS are trademarks of
+Digital Equipment Corporation.
+.FE
+are based on several central abstractions which
+structure the external (user) view of network communication
+as well as the internal (system) implementation.
+.PP
+The report documents the internal structure of the networking system.
+The ``Berkeley Software Architecture Manual, 4.4BSD Edition'' (PSD:5)
+provides a description of the user interface to the networking facilities.
+.sp
+.LP
+Revised June 10, 1993
+.AE
+.LP
+.\".de PT
+.\".lt \\n(LLu
+.\".pc %
+.\".nr PN \\n%
+.\".tl '\\*(LH'\\*(CH'\\*(RH'
+.\".lt \\n(.lu
+.\"..
+.\".ds RH Contents
+.OH 'Networking Implementation Notes''SMM:18-%'
+.EH 'SMM:18-%''Networking Implementation Notes'
+.bp
+.ce
+.B "TABLE OF CONTENTS"
+.LP
+.sp 1
+.nf
+.B "1. Introduction"
+.LP
+.sp .5v
+.nf
+.B "2. Overview"
+.LP
+.sp .5v
+.nf
+.B "3. Goals
+.LP
+.sp .5v
+.nf
+.B "4. Internal address representation"
+.LP
+.sp .5v
+.nf
+.B "5. Memory management"
+.LP
+.sp .5v
+.nf
+.B "6. Internal layering
+6.1. Socket layer
+6.1.1. Socket state
+6.1.2. Socket data queues
+6.1.3. Socket connection queuing
+6.2. Protocol layer(s)
+6.3. Network-interface layer
+6.3.1. UNIBUS interfaces
+.LP
+.sp .5v
+.nf
+.B "7. Socket/protocol interface"
+.LP
+.sp .5v
+.nf
+.B "8. Protocol/protocol interface"
+8.1. pr_output
+8.2. pr_input
+8.3. pr_ctlinput
+8.4. pr_ctloutput
+.LP
+.sp .5v
+.nf
+.B "9. Protocol/network-interface interface"
+9.1. Packet transmission
+9.2. Packet reception
+.LP
+.sp .5v
+.nf
+.B "10. Gateways and routing issues
+10.1. Routing tables
+10.2. Routing table interface
+10.3. User level routing policies
+.LP
+.sp .5v
+.nf
+.B "11. Raw sockets"
+11.1. Control blocks
+11.2. Input processing
+11.3. Output processing
+.LP
+.sp .5v
+.nf
+.B "12. Buffering and congestion control"
+12.1. Memory management
+12.2. Protocol buffering policies
+12.3. Queue limiting
+12.4. Packet forwarding
+.LP
+.sp .5v
+.nf
+.B "13. Out of band data"
+.LP
+.sp .5v
+.nf
+.B "14. Trailer protocols"
+.LP
+.sp .5v
+.nf
+.B Acknowledgements
+.LP
+.sp .5v
+.nf
+.B References
+.bp
+.de _d
+.if t .ta .6i 2.1i 2.6i
+.\" 2.94 went to 2.6, 3.64 to 3.30
+.if n .ta .84i 2.6i 3.30i
+..
+.de _f
+.if t .ta .5i 1.25i 2.5i
+.\" 3.5i went to 3.8i
+.if n .ta .7i 1.75i 3.8i
+..
diff --git a/share/doc/smm/18.net/1.t b/share/doc/smm/18.net/1.t
new file mode 100644
index 0000000..ba5adb5
--- /dev/null
+++ b/share/doc/smm/18.net/1.t
@@ -0,0 +1,66 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)1.t 8.1 (Berkeley) 6/8/93
+.\"
+.\".ds RH Introduction
+.br
+.ne 2i
+.NH
+\s+2Introduction\s0
+.PP
+This report describes the internal structure of
+facilities added to the
+4.2BSD version of the UNIX operating system for
+the VAX,
+as modified in the 4.4BSD release.
+The system facilities provide
+a uniform user interface to networking
+within UNIX. In addition, the implementation
+introduces a structure for network communications which may be
+used by system implementors in adding new networking
+facilities. The internal structure is not visible
+to the user, rather it is intended to aid implementors
+of communication protocols and network services by
+providing a framework which
+promotes code sharing and minimizes implementation effort.
+.PP
+The reader is expected to be familiar with the C programming
+language and system interface, as described in the
+\fIBerkeley Software Architecture Manual, 4.4BSD Edition\fP [Joy86].
+Basic understanding of network
+communication concepts is assumed; where required
+any additional ideas are introduced.
+.PP
+The remainder of this document
+provides a description of the system internals,
+avoiding, when possible, those portions which are utilized only
+by the interprocess communication facilities.
diff --git a/share/doc/smm/18.net/2.t b/share/doc/smm/18.net/2.t
new file mode 100644
index 0000000..f504889
--- /dev/null
+++ b/share/doc/smm/18.net/2.t
@@ -0,0 +1,85 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)2.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH Overview
+.br
+.ne 2i
+.NH
+\s+2Overview\s0
+.PP
+If we consider
+the International Standards Organization's (ISO)
+Open System Interconnection (OSI) model of
+network communication [ISO81] [Zimmermann80],
+the networking facilities
+described here correspond to a portion of the
+session layer (layer 3) and all of the transport and
+network layers (layers 2 and 1, respectively).
+.PP
+The network layer provides possibly imperfect
+data transport services with minimal addressing
+structure.
+Addressing at this level is normally host to host,
+with implicit or explicit routing optionally supported
+by the communicating agents.
+.PP
+At the transport
+layer the notions of reliable transfer, data sequencing,
+flow control, and service addressing are normally
+included. Reliability is usually managed by
+explicit acknowledgement of data delivered. Failure
+to acknowledge a transfer results in retransmission of
+the data. Sequencing may be handled by tagging
+each message handed to the network layer by a
+\fIsequence number\fP and maintaining
+state at the endpoints of communication to utilize
+received sequence numbers in reordering data which
+arrives out of order.
+.PP
+The session layer facilities may provide forms of
+addressing which are mapped into formats required
+by the transport layer, service authentication
+and client authentication, etc. Various systems
+also provide services such as data encryption and
+address and protocol translation.
+.PP
+The following sections begin by describing some of the common
+data structures and utility routines, then examine
+the internal layering. The contents of each layer
+and its interface are considered. Certain of the
+interfaces are protocol implementation specific. For
+these cases examples have been drawn from the Internet [Cerf78]
+protocol family. Later sections cover routing issues,
+the design of the raw socket interface and other
+miscellaneous topics.
diff --git a/share/doc/smm/18.net/3.t b/share/doc/smm/18.net/3.t
new file mode 100644
index 0000000..1d1fddd
--- /dev/null
+++ b/share/doc/smm/18.net/3.t
@@ -0,0 +1,59 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)3.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH Goals
+.br
+.ne 2i
+.NH
+\s+2Goals\s0
+.PP
+The networking system was designed with the goal of supporting
+multiple \fIprotocol families\fP and addressing styles. This required
+information to be ``hidden'' in common data structures which
+could be manipulated by all the pieces of the system, but which
+required interpretation only by the protocols which ``controlled''
+it. The system described here attempts to minimize
+the use of shared data structures to those kept by a suite of
+protocols (a \fIprotocol family\fP), and those used for rendezvous
+between ``synchronous'' and ``asynchronous'' portions of the
+system (e.g. queues of data packets are filled at interrupt
+time and emptied based on user requests).
+.PP
+A major goal of the system was to provide a framework within
+which new protocols and hardware could be easily be supported.
+To this end, a great deal of effort has been extended to
+create utility routines which hide many of the more
+complex and/or hardware dependent chores of networking.
+Later sections describe the utility routines and the underlying
+data structures they manipulate.
diff --git a/share/doc/smm/18.net/4.t b/share/doc/smm/18.net/4.t
new file mode 100644
index 0000000..afa6913
--- /dev/null
+++ b/share/doc/smm/18.net/4.t
@@ -0,0 +1,67 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)4.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Address representation
+.br
+.ne 2i
+.NH
+\s+2Internal address representation\s0
+.PP
+Common to all portions of the system are two data structures.
+These structures are used to represent
+addresses and various data objects.
+Addresses, internally are described by the \fIsockaddr\fP structure,
+.DS
+._f
+struct sockaddr {
+ short sa_family; /* data format identifier */
+ char sa_data[14]; /* address */
+};
+.DE
+All addresses belong to one or more \fIaddress families\fP
+which define their format and interpretation.
+The \fIsa_family\fP field indicates the address family to which the address
+belongs, and the \fIsa_data\fP field contains the actual data value.
+The size of the data field, 14 bytes, was selected based on a study
+of current address formats.*
+Specific address formats use private structure definitions
+that define the format of the data field.
+The system interface supports larger address structures,
+although address-family-independent support facilities, for example routing
+and raw socket interfaces, provide only 14 bytes for address storage.
+Protocols that do not use those facilities (e.g, the current Unix domain)
+may use larger data areas.
+.FS
+* Later versions of the system may support variable length addresses.
+.FE
diff --git a/share/doc/smm/18.net/5.t b/share/doc/smm/18.net/5.t
new file mode 100644
index 0000000..d4fb8e3
--- /dev/null
+++ b/share/doc/smm/18.net/5.t
@@ -0,0 +1,184 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)5.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Memory management
+.br
+.ne 2i
+.NH
+\s+2Memory management\s0
+.PP
+A single mechanism is used for data storage: memory buffers, or
+\fImbuf\fP's. An mbuf is a structure of the form:
+.DS
+._f
+struct mbuf {
+ struct mbuf *m_next; /* next buffer in chain */
+ u_long m_off; /* offset of data */
+ short m_len; /* amount of data in this mbuf */
+ short m_type; /* mbuf type (accounting) */
+ u_char m_dat[MLEN]; /* data storage */
+ struct mbuf *m_act; /* link in higher-level mbuf list */
+};
+.DE
+The \fIm_next\fP field is used to chain mbufs together on linked
+lists, while the \fIm_act\fP field allows lists of mbuf chains to be
+accumulated. By convention, the mbufs common to a single object
+(for example, a packet) are chained together with the \fIm_next\fP
+field, while groups of objects are linked via the \fIm_act\fP
+field (possibly when in a queue).
+.PP
+Each mbuf has a small data area for storing information, \fIm_dat\fP.
+The \fIm_len\fP field indicates the amount of data, while the \fIm_off\fP
+field is an offset to the beginning of the data from the base of the
+mbuf. Thus, for example, the macro \fImtod\fP, which converts a pointer
+to an mbuf to a pointer to the data stored in the mbuf, has the form
+.DS
+._d
+#define mtod(\fIx\fP,\fIt\fP) ((\fIt\fP)((int)(\fIx\fP) + (\fIx\fP)->m_off))
+.DE
+(note the \fIt\fP parameter, a C type cast, which is used to cast
+the resultant pointer for proper assignment).
+.PP
+In addition to storing data directly in the mbuf's data area, data
+of page size may be also be stored in a separate area of memory.
+The mbuf utility routines maintain
+a pool of pages for this purpose and manipulate a private page map
+for such pages.
+An mbuf with an external data area may be recognized by the larger
+offset to the data area;
+this is formalized by the macro M_HASCL(\fIm\fP), which is true
+if the mbuf whose address is \fIm\fP has an external page cluster.
+An array of reference counts on pages is also maintained
+so that copies of pages may be made without core to core
+copying (copies are created simply by duplicating the reference to the data
+and incrementing the associated reference counts for the pages).
+Separate data pages are currently used only
+when copying data from a user process into the kernel,
+and when bringing data in at the hardware level. Routines which
+manipulate mbufs are not normally aware whether data is stored directly in
+the mbuf data array, or if it is kept in separate pages.
+.PP
+The following may be used to allocate and free mbufs:
+.LP
+m = m_get(wait, type);
+.br
+MGET(m, wait, type);
+.IP
+The subroutine \fIm_get\fP and the macro \fIMGET\fP
+each allocate an mbuf, placing its address in \fIm\fP.
+The argument \fIwait\fP is either M_WAIT or M_DONTWAIT according
+to whether allocation should block or fail if no mbuf is available.
+The \fItype\fP is one of the predefined mbuf types for use in accounting
+of mbuf allocation.
+.IP "MCLGET(m);"
+This macro attempts to allocate an mbuf page cluster
+to associate with the mbuf \fIm\fP.
+If successful, the length of the mbuf is set to CLSIZE,
+the size of the page cluster.
+.LP
+n = m_free(m);
+.br
+MFREE(m,n);
+.IP
+The routine \fIm_free\fP and the macro \fIMFREE\fP
+each free a single mbuf, \fIm\fP, and any associated external storage area,
+placing a pointer to its successor in the chain it heads, if any, in \fIn\fP.
+.IP "m_freem(m);"
+This routine frees an mbuf chain headed by \fIm\fP.
+.PP
+The following utility routines are available for manipulating mbuf
+chains:
+.IP "m = m_copy(m0, off, len);"
+.br
+The \fIm_copy\fP routine create a copy of all, or part, of a
+list of the mbufs in \fIm0\fP. \fILen\fP bytes of data, starting
+\fIoff\fP bytes from the front of the chain, are copied.
+Where possible, reference counts on pages are used instead
+of core to core copies. The original mbuf chain must have at
+least \fIoff\fP + \fIlen\fP bytes of data. If \fIlen\fP is
+specified as M_COPYALL, all the data present, offset
+as before, is copied.
+.IP "m_cat(m, n);"
+.br
+The mbuf chain, \fIn\fP, is appended to the end of \fIm\fP.
+Where possible, compaction is performed.
+.IP "m_adj(m, diff);"
+.br
+The mbuf chain, \fIm\fP is adjusted in size by \fIdiff\fP
+bytes. If \fIdiff\fP is non-negative, \fIdiff\fP bytes
+are shaved off the front of the mbuf chain. If \fIdiff\fP
+is negative, the alteration is performed from back to front.
+No space is reclaimed in this operation; alterations are
+accomplished by changing the \fIm_len\fP and \fIm_off\fP
+fields of mbufs.
+.IP "m = m_pullup(m0, size);"
+.br
+After a successful call to \fIm_pullup\fP, the mbuf at
+the head of the returned list, \fIm\fP, is guaranteed
+to have at least \fIsize\fP
+bytes of data in contiguous memory within the data area of the mbuf
+(allowing access via a pointer, obtained using the \fImtod\fP macro,
+and allowing the mbuf to be located from a pointer to the data area
+using \fIdtom\fP, defined below).
+If the original data was less than \fIsize\fP bytes long,
+\fIlen\fP was greater than the size of an mbuf data
+area (112 bytes), or required resources were unavailable,
+\fIm\fP is 0 and the original mbuf chain is deallocated.
+.IP
+This routine is particularly useful when verifying packet
+header lengths on reception. For example, if a packet is
+received and only 8 of the necessary 16 bytes required
+for a valid packet header are present at the head of the list
+of mbufs representing the packet, the remaining 8 bytes
+may be ``pulled up'' with a single \fIm_pullup\fP call.
+If the call fails the invalid packet will have been discarded.
+.PP
+By insuring that mbufs always reside on 128 byte boundaries,
+it is always possible to locate the mbuf associated with a data
+area by masking off the low bits of the virtual address.
+This allows modules to store data structures in mbufs and
+pass them around without concern for locating the original
+mbuf when it comes time to free the structure.
+Note that this works only with objects stored in the internal data
+buffer of the mbuf.
+The \fIdtom\fP macro is used to convert a pointer into an mbuf's
+data area to a pointer to the mbuf,
+.DS
+#define dtom(x) ((struct mbuf *)((int)x & ~(MSIZE-1)))
+.DE
+.PP
+Mbufs are used for dynamically allocated data structures such as
+sockets as well as memory allocated for packets and headers. Statistics are
+maintained on mbuf usage and can be viewed by users using the
+\fInetstat\fP\|(1) program.
diff --git a/share/doc/smm/18.net/6.t b/share/doc/smm/18.net/6.t
new file mode 100644
index 0000000..601988c
--- /dev/null
+++ b/share/doc/smm/18.net/6.t
@@ -0,0 +1,664 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)6.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Internal layering
+.br
+.ne 2i
+.NH
+\s+2Internal layering\s0
+.PP
+The internal structure of the network system is divided into
+three layers. These
+layers correspond to the services provided by the socket
+abstraction, those provided by the communication protocols,
+and those provided by the hardware interfaces. The communication
+protocols are normally layered into two or more individual
+cooperating layers, though they are collectively viewed
+in the system as one layer providing services supportive
+of the appropriate socket abstraction.
+.PP
+The following sections describe the properties of each layer
+in the system and the interfaces to which each must conform.
+.NH 2
+Socket layer
+.PP
+The socket layer deals with the interprocess communication
+facilities provided by the system. A socket is a bidirectional
+endpoint of communication which is ``typed'' by the semantics
+of communication it supports. The system calls described in
+the \fIBerkeley Software Architecture Manual\fP [Joy86]
+are used to manipulate sockets.
+.PP
+A socket consists of the following data structure:
+.DS
+._f
+struct socket {
+ short so_type; /* generic type */
+ short so_options; /* from socket call */
+ short so_linger; /* time to linger while closing */
+ short so_state; /* internal state flags */
+ caddr_t so_pcb; /* protocol control block */
+ struct protosw *so_proto; /* protocol handle */
+ struct socket *so_head; /* back pointer to accept socket */
+ struct socket *so_q0; /* queue of partial connections */
+ short so_q0len; /* partials on so_q0 */
+ struct socket *so_q; /* queue of incoming connections */
+ short so_qlen; /* number of connections on so_q */
+ short so_qlimit; /* max number queued connections */
+ struct sockbuf so_rcv; /* receive queue */
+ struct sockbuf so_snd; /* send queue */
+ short so_timeo; /* connection timeout */
+ u_short so_error; /* error affecting connection */
+ u_short so_oobmark; /* chars to oob mark */
+ short so_pgrp; /* pgrp for signals */
+};
+.DE
+.PP
+Each socket contains two data queues, \fIso_rcv\fP and \fIso_snd\fP,
+and a pointer to routines which provide supporting services.
+The type of the socket,
+\fIso_type\fP is defined at socket creation time and used in selecting
+those services which are appropriate to support it. The supporting
+protocol is selected at socket creation time and recorded in
+the socket data structure for later use. Protocols are defined
+by a table of procedures, the \fIprotosw\fP structure, which will
+be described in detail later. A pointer to a protocol-specific
+data structure,
+the ``protocol control block,'' is also present in the socket structure.
+Protocols control this data structure, which normally includes a
+back pointer to the parent socket structure to allow easy
+lookup when returning information to a user
+(for example, placing an error number in the \fIso_error\fP
+field). The other entries in the socket structure are used in
+queuing connection requests, validating user requests, storing
+socket characteristics (e.g.
+options supplied at the time a socket is created), and maintaining
+a socket's state.
+.PP
+Processes ``rendezvous at a socket'' in many instances. For instance,
+when a process wishes to extract data from a socket's receive queue
+and it is empty, or lacks sufficient data to satisfy the request,
+the process blocks, supplying the address of the receive queue as
+a ``wait channel' to be used in notification. When data arrives
+for the process and is placed in the socket's queue, the blocked
+process is identified by the fact it is waiting ``on the queue.''
+.NH 3
+Socket state
+.PP
+A socket's state is defined from the following:
+.DS
+.ta \w'#define 'u +\w'SS_ISDISCONNECTING 'u +\w'0x000 'u
+#define SS_NOFDREF 0x001 /* no file table ref any more */
+#define SS_ISCONNECTED 0x002 /* socket connected to a peer */
+#define SS_ISCONNECTING 0x004 /* in process of connecting to peer */
+#define SS_ISDISCONNECTING 0x008 /* in process of disconnecting */
+#define SS_CANTSENDMORE 0x010 /* can't send more data to peer */
+#define SS_CANTRCVMORE 0x020 /* can't receive more data from peer */
+#define SS_RCVATMARK 0x040 /* at mark on input */
+
+#define SS_PRIV 0x080 /* privileged */
+#define SS_NBIO 0x100 /* non-blocking ops */
+#define SS_ASYNC 0x200 /* async i/o notify */
+.DE
+.PP
+The state of a socket is manipulated both by the protocols
+and the user (through system calls).
+When a socket is created, the state is defined based on the type of socket.
+It may change as control actions are performed, for example connection
+establishment.
+It may also change according to the type of
+input/output the user wishes to perform, as indicated by options
+set with \fIfcntl\fP. ``Non-blocking'' I/O implies that
+a process should never be blocked to await resources. Instead, any
+call which would block returns prematurely
+with the error EWOULDBLOCK, or the service request may be partially
+fulfilled, e.g. a request for more data than is present.
+.PP
+If a process requested ``asynchronous'' notification of events
+related to the socket, the SIGIO signal is posted to the process
+when such events occur.
+An event is a change in the socket's state;
+examples of such occurrences are: space
+becoming available in the send queue, new data available in the
+receive queue, connection establishment or disestablishment, etc.
+.PP
+A socket may be marked ``privileged'' if it was created by the
+super-user. Only privileged sockets may
+bind addresses in privileged portions of an address space
+or use ``raw'' sockets to access lower levels of the network.
+.NH 3
+Socket data queues
+.PP
+A socket's data queue contains a pointer to the data stored in
+the queue and other entries related to the management of
+the data. The following structure defines a data queue:
+.DS
+._f
+struct sockbuf {
+ u_short sb_cc; /* actual chars in buffer */
+ u_short sb_hiwat; /* max actual char count */
+ u_short sb_mbcnt; /* chars of mbufs used */
+ u_short sb_mbmax; /* max chars of mbufs to use */
+ u_short sb_lowat; /* low water mark */
+ short sb_timeo; /* timeout */
+ struct mbuf *sb_mb; /* the mbuf chain */
+ struct proc *sb_sel; /* process selecting read/write */
+ short sb_flags; /* flags, see below */
+};
+.DE
+.PP
+Data is stored in a queue as a chain of mbufs.
+The actual count of data characters as well as high and low water marks are
+used by the protocols in controlling the flow of data.
+The amount of buffer space (characters of mbufs and associated data pages)
+is also recorded along with the limit on buffer allocation.
+The socket routines cooperate in implementing the flow control
+policy by blocking a process when it requests to send data and
+the high water mark has been reached, or when it requests to
+receive data and less than the low water mark is present
+(assuming non-blocking I/O has not been specified).*
+.FS
+* The low-water mark is always presumed to be 0
+in the current implementation.
+.FE
+.PP
+When a socket is created, the supporting protocol ``reserves'' space
+for the send and receive queues of the socket.
+The limit on buffer allocation is set somewhat higher than the limit
+on data characters
+to account for the granularity of buffer allocation.
+The actual storage associated with a
+socket queue may fluctuate during a socket's lifetime, but it is assumed
+that this reservation will always allow a protocol to acquire enough memory
+to satisfy the high water marks.
+.PP
+The timeout and select values are manipulated by the socket routines
+in implementing various portions of the interprocess communications
+facilities and will not be described here.
+.PP
+Data queued at a socket is stored in one of two styles.
+Stream-oriented sockets queue data with no addresses, headers
+or record boundaries.
+The data are in mbufs linked through the \fIm_next\fP field.
+Buffers containing access rights may be present within the chain
+if the underlying protocol supports passage of access rights.
+Record-oriented sockets, including datagram sockets,
+queue data as a list of packets; the sections of packets are distinguished
+by the types of the mbufs containing them.
+The mbufs which comprise a record are linked through the \fIm_next\fP field;
+records are linked from the \fIm_act\fP field of the first mbuf
+of one packet to the first mbuf of the next.
+Each packet begins with an mbuf containing the ``from'' address
+if the protocol provides it,
+then any buffers containing access rights, and finally any buffers
+containing data.
+If a record contains no data,
+no data buffers are required unless neither address nor access rights
+are present.
+.PP
+A socket queue has a number of flags used in synchronizing access
+to the data and in acquiring resources:
+.DS
+._d
+#define SB_LOCK 0x01 /* lock on data queue (so_rcv only) */
+#define SB_WANT 0x02 /* someone is waiting to lock */
+#define SB_WAIT 0x04 /* someone is waiting for data/space */
+#define SB_SEL 0x08 /* buffer is selected */
+#define SB_COLL 0x10 /* collision selecting */
+.DE
+The last two flags are manipulated by the system in implementing
+the select mechanism.
+.NH 3
+Socket connection queuing
+.PP
+In dealing with connection oriented sockets (e.g. SOCK_STREAM)
+the two ends are considered distinct. One end is termed
+\fIactive\fP, and generates connection requests. The other
+end is called \fIpassive\fP and accepts connection requests.
+.PP
+From the passive side, a socket is marked with
+SO_ACCEPTCONN when a \fIlisten\fP call is made,
+creating two queues of sockets: \fIso_q0\fP for connections
+in progress and \fIso_q\fP for connections already made and
+awaiting user acceptance.
+As a protocol is preparing incoming connections, it creates
+a socket structure queued on \fIso_q0\fP by calling the routine
+\fIsonewconn\fP(). When the connection
+is established, the socket structure is then transferred
+to \fIso_q\fP, making it available for an \fIaccept\fP.
+.PP
+If an SO_ACCEPTCONN socket is closed with sockets on either
+\fIso_q0\fP or \fIso_q\fP, these sockets are dropped,
+with notification to the peers as appropriate.
+.NH 2
+Protocol layer(s)
+.PP
+Each socket is created in a communications domain,
+which usually implies both an addressing structure (address family)
+and a set of protocols which implement various socket types within the domain
+(protocol family).
+Each domain is defined by the following structure:
+.DS
+.ta .5i +\w'struct 'u +\w'(*dom_externalize)(); 'u
+struct domain {
+ int dom_family; /* PF_xxx */
+ char *dom_name;
+ int (*dom_init)(); /* initialize domain data structures */
+ int (*dom_externalize)(); /* externalize access rights */
+ int (*dom_dispose)(); /* dispose of internalized rights */
+ struct protosw *dom_protosw, *dom_protoswNPROTOSW;
+ struct domain *dom_next;
+};
+.DE
+.PP
+At boot time, each domain configured into the kernel
+is added to a linked list of domain.
+The initialization procedure of each domain is then called.
+After that time, the domain structure is used to locate protocols
+within the protocol family.
+It may also contain procedure references
+for externalization of access rights at the receiving socket
+and the disposal of access rights that are not received.
+.PP
+Protocols are described by a set of entry points and certain
+socket-visible characteristics, some of which are used in
+deciding which socket type(s) they may support.
+.PP
+An entry in the ``protocol switch'' table exists for each
+protocol module configured into the system. It has the following form:
+.DS
+.ta .5i +\w'struct 'u +\w'domain *pr_domain; 'u
+struct protosw {
+ short pr_type; /* socket type used for */
+ struct domain *pr_domain; /* domain protocol a member of */
+ short pr_protocol; /* protocol number */
+ short pr_flags; /* socket visible attributes */
+/* protocol-protocol hooks */
+ int (*pr_input)(); /* input to protocol (from below) */
+ int (*pr_output)(); /* output to protocol (from above) */
+ int (*pr_ctlinput)(); /* control input (from below) */
+ int (*pr_ctloutput)(); /* control output (from above) */
+/* user-protocol hook */
+ int (*pr_usrreq)(); /* user request */
+/* utility hooks */
+ int (*pr_init)(); /* initialization routine */
+ int (*pr_fasttimo)(); /* fast timeout (200ms) */
+ int (*pr_slowtimo)(); /* slow timeout (500ms) */
+ int (*pr_drain)(); /* flush any excess space possible */
+};
+.DE
+.PP
+A protocol is called through the \fIpr_init\fP entry before any other.
+Thereafter it is called every 200 milliseconds through the
+\fIpr_fasttimo\fP entry and
+every 500 milliseconds through the \fIpr_slowtimo\fP for timer based actions.
+The system will call the \fIpr_drain\fP entry if it is low on space and
+this should throw away any non-critical data.
+.PP
+Protocols pass data between themselves as chains of mbufs using
+the \fIpr_input\fP and \fIpr_output\fP routines. \fIPr_input\fP
+passes data up (towards
+the user) and \fIpr_output\fP passes it down (towards the network); control
+information passes up and down on \fIpr_ctlinput\fP and \fIpr_ctloutput\fP.
+The protocol is responsible for the space occupied by any of the
+arguments to these entries and must either pass it onward or dispose of it.
+(On output, the lowest level reached must free buffers storing the arguments;
+on input, the highest level is responsible for freeing buffers.)
+.PP
+The \fIpr_usrreq\fP routine interfaces protocols to the socket
+code and is described below.
+.PP
+The \fIpr_flags\fP field is constructed from the following values:
+.DS
+.ta \w'#define 'u +\w'PR_CONNREQUIRED 'u +8n
+#define PR_ATOMIC 0x01 /* exchange atomic messages only */
+#define PR_ADDR 0x02 /* addresses given with messages */
+#define PR_CONNREQUIRED 0x04 /* connection required by protocol */
+#define PR_WANTRCVD 0x08 /* want PRU_RCVD calls */
+#define PR_RIGHTS 0x10 /* passes capabilities */
+.DE
+Protocols which are connection-based specify the PR_CONNREQUIRED
+flag so that the socket routines will never attempt to send data
+before a connection has been established. If the PR_WANTRCVD flag
+is set, the socket routines will notify the protocol when the user
+has removed data from the socket's receive queue. This allows
+the protocol to implement acknowledgement on user receipt, and
+also update windowing information based on the amount of space
+available in the receive queue. The PR_ADDR field indicates that any
+data placed in the socket's receive queue will be preceded by the
+address of the sender. The PR_ATOMIC flag specifies that each \fIuser\fP
+request to send data must be performed in a single \fIprotocol\fP send
+request; it is the protocol's responsibility to maintain record
+boundaries on data to be sent. The PR_RIGHTS flag indicates that the
+protocol supports the passing of capabilities; this is currently
+used only by the protocols in the UNIX protocol family.
+.PP
+When a socket is created, the socket routines scan the protocol
+table for the domain
+looking for an appropriate protocol to support the type of
+socket being created. The \fIpr_type\fP field contains one of the
+possible socket types (e.g. SOCK_STREAM), while the \fIpr_domain\fP
+is a back pointer to the domain structure.
+The \fIpr_protocol\fP field contains the protocol number of the
+protocol, normally a well-known value.
+.NH 2
+Network-interface layer
+.PP
+Each network-interface configured into a system defines a
+path through which packets may be sent and received.
+Normally a hardware device is associated with this interface,
+though there is no requirement for this (for example, all
+systems have a software ``loopback'' interface used for
+debugging and performance analysis).
+In addition to manipulating the hardware device, an interface
+module is responsible
+for encapsulation and decapsulation of any link-layer header
+information required to deliver a message to its destination.
+The selection of which interface to use in delivering packets
+is a routing decision carried out at a
+higher level than the network-interface layer.
+An interface may have addresses in one or more address families.
+The address is set at boot time using an \fIioctl\fP on a socket
+in the appropriate domain; this operation is implemented by the protocol
+family, after verifying the operation through the device \fIioctl\fP entry.
+.PP
+An interface is defined by the following structure,
+.DS
+.ta .5i +\w'struct 'u +\w'ifaddr *if_addrlist; 'u
+struct ifnet {
+ char *if_name; /* name, e.g. ``en'' or ``lo'' */
+ short if_unit; /* sub-unit for lower level driver */
+ short if_mtu; /* maximum transmission unit */
+ short if_flags; /* up/down, broadcast, etc. */
+ short if_timer; /* time 'til if_watchdog called */
+ struct ifaddr *if_addrlist; /* list of addresses of interface */
+ struct ifqueue if_snd; /* output queue */
+ int (*if_init)(); /* init routine */
+ int (*if_output)(); /* output routine */
+ int (*if_ioctl)(); /* ioctl routine */
+ int (*if_reset)(); /* bus reset routine */
+ int (*if_watchdog)(); /* timer routine */
+ int if_ipackets; /* packets received on interface */
+ int if_ierrors; /* input errors on interface */
+ int if_opackets; /* packets sent on interface */
+ int if_oerrors; /* output errors on interface */
+ int if_collisions; /* collisions on csma interfaces */
+ struct ifnet *if_next;
+};
+.DE
+Each interface address has the following form:
+.DS
+.ta \w'#define 'u +\w'struct 'u +\w'struct 'u +\w'sockaddr ifa_addr; 'u-\w'struct 'u
+struct ifaddr {
+ struct sockaddr ifa_addr; /* address of interface */
+ union {
+ struct sockaddr ifu_broadaddr;
+ struct sockaddr ifu_dstaddr;
+ } ifa_ifu;
+ struct ifnet *ifa_ifp; /* back-pointer to interface */
+ struct ifaddr *ifa_next; /* next address for interface */
+};
+.ta \w'#define 'u +\w'ifa_broadaddr 'u +\w'ifa_ifu.ifu_broadaddr 'u
+#define ifa_broadaddr ifa_ifu.ifu_broadaddr /* broadcast address */
+#define ifa_dstaddr ifa_ifu.ifu_dstaddr /* other end of p-to-p link */
+.DE
+The protocol generally maintains this structure as part of a larger
+structure containing additional information concerning the address.
+.PP
+Each interface has a send queue and routines used for
+initialization, \fIif_init\fP, and output, \fIif_output\fP.
+If the interface resides on a system bus, the routine \fIif_reset\fP
+will be called after a bus reset has been performed.
+An interface may also
+specify a timer routine, \fIif_watchdog\fP;
+if \fIif_timer\fP is non-zero, it is decremented once per second
+until it reaches zero, at which time the watchdog routine is called.
+.PP
+The state of an interface and certain characteristics are stored in
+the \fIif_flags\fP field. The following values are possible:
+.DS
+._d
+#define IFF_UP 0x1 /* interface is up */
+#define IFF_BROADCAST 0x2 /* broadcast is possible */
+#define IFF_DEBUG 0x4 /* turn on debugging */
+#define IFF_LOOPBACK 0x8 /* is a loopback net */
+#define IFF_POINTOPOINT 0x10 /* interface is point-to-point link */
+#define IFF_NOTRAILERS 0x20 /* avoid use of trailers */
+#define IFF_RUNNING 0x40 /* resources allocated */
+#define IFF_NOARP 0x80 /* no address resolution protocol */
+.DE
+If the interface is connected to a network which supports transmission
+of \fIbroadcast\fP packets, the IFF_BROADCAST flag will be set and
+the \fIifa_broadaddr\fP field will contain the address to be used in
+sending or accepting a broadcast packet. If the interface is associated
+with a point-to-point hardware link (for example, a DEC DMR-11), the
+IFF_POINTOPOINT flag will be set and \fIifa_dstaddr\fP will contain the
+address of the host on the other side of the connection. These addresses
+and the local address of the interface, \fIif_addr\fP, are used in
+filtering incoming packets. The interface sets IFF_RUNNING after
+it has allocated system resources and posted an initial read on the
+device it manages. This state bit is used to avoid multiple allocation
+requests when an interface's address is changed. The IFF_NOTRAILERS
+flag indicates the interface should refrain from using a \fItrailer\fP
+encapsulation on outgoing packets, or (where per-host negotiation
+of trailers is possible) that trailer encapsulations should not be requested;
+\fItrailer\fP protocols are described
+in section 14. The IFF_NOARP flag indicates the interface should not
+use an ``address resolution protocol'' in mapping internetwork addresses
+to local network addresses.
+.PP
+Various statistics are also stored in the interface structure. These
+may be viewed by users using the \fInetstat\fP(1) program.
+.PP
+The interface address and flags may be set with the SIOCSIFADDR and
+SIOCSIFFLAGS \fIioctl\fP\^s. SIOCSIFADDR is used initially to define each
+interface's address; SIOGSIFFLAGS can be used to mark
+an interface down and perform site-specific configuration.
+The destination address of a point-to-point link is set with SIOCSIFDSTADDR.
+Corresponding operations exist to read each value.
+Protocol families may also support operations to set and read the broadcast
+address.
+In addition, the SIOCGIFCONF \fIioctl\fP retrieves a list of interface
+names and addresses for all interfaces and protocols on the host.
+.NH 3
+UNIBUS interfaces
+.PP
+All hardware related interfaces currently reside on the UNIBUS.
+Consequently a common set of utility routines for dealing
+with the UNIBUS has been developed. Each UNIBUS interface
+utilizes a structure of the following form:
+.DS
+.ta \w'#define 'u +\w'ifw_xtofree 'u +\w'pte ifu_wmap[IF_MAXNUBAMR]; 'u
+struct ifubinfo {
+ short iff_uban; /* uba number */
+ short iff_hlen; /* local net header length */
+ struct uba_regs *iff_uba; /* uba regs, in vm */
+ short iff_flags; /* used during uballoc's */
+};
+.DE
+Additional structures are associated with each receive and transmit buffer,
+normally one each per interface; for read,
+.DS
+.ta \w'#define 'u +\w'ifw_xtofree 'u +\w'pte ifu_wmap[IF_MAXNUBAMR]; 'u
+struct ifrw {
+ caddr_t ifrw_addr; /* virt addr of header */
+ short ifrw_bdp; /* unibus bdp */
+ short ifrw_flags; /* type, etc. */
+#define IFRW_W 0x01 /* is a transmit buffer */
+ int ifrw_info; /* value from ubaalloc */
+ int ifrw_proto; /* map register prototype */
+ struct pte *ifrw_mr; /* base of map registers */
+};
+.DE
+and for write,
+.DS
+.ta \w'#define 'u +\w'ifw_xtofree 'u +\w'pte ifu_wmap[IF_MAXNUBAMR]; 'u
+struct ifxmt {
+ struct ifrw ifrw;
+ caddr_t ifw_base; /* virt addr of buffer */
+ struct pte ifw_wmap[IF_MAXNUBAMR]; /* base pages for output */
+ struct mbuf *ifw_xtofree; /* pages being dma'd out */
+ short ifw_xswapd; /* mask of clusters swapped */
+ short ifw_nmr; /* number of entries in wmap */
+};
+.ta \w'#define 'u +\w'ifw_xtofree 'u +\w'pte ifu_wmap[IF_MAXNUBAMR]; 'u
+#define ifw_addr ifrw.ifrw_addr
+#define ifw_bdp ifrw.ifrw_bdp
+#define ifw_flags ifrw.ifrw_flags
+#define ifw_info ifrw.ifrw_info
+#define ifw_proto ifrw.ifrw_proto
+#define ifw_mr ifrw.ifrw_mr
+.DE
+One of each of these structures is conveniently packaged for interfaces
+with single buffers for each direction, as follows:
+.DS
+.ta \w'#define 'u +\w'ifw_xtofree 'u +\w'pte ifu_wmap[IF_MAXNUBAMR]; 'u
+struct ifuba {
+ struct ifubinfo ifu_info;
+ struct ifrw ifu_r;
+ struct ifxmt ifu_xmt;
+};
+.ta \w'#define 'u +\w'ifw_xtofree 'u
+#define ifu_uban ifu_info.iff_uban
+#define ifu_hlen ifu_info.iff_hlen
+#define ifu_uba ifu_info.iff_uba
+#define ifu_flags ifu_info.iff_flags
+#define ifu_w ifu_xmt.ifrw
+#define ifu_xtofree ifu_xmt.ifw_xtofree
+.DE
+.PP
+The \fIif_ubinfo\fP structure contains the general information needed
+to characterize the I/O-mapped buffers for the device.
+In addition, there is a structure describing each buffer, including
+UNIBUS resources held by the interface.
+Sufficient memory pages and bus map registers are allocated to each buffer
+upon initialization according to the maximum packet size and header length.
+The kernel virtual address of the buffer is held in \fIifrw_addr\fP,
+and the map registers begin
+at \fIifrw_mr\fP. UNIBUS map register \fIifrw_mr\fP\^[\-1]
+maps the local network header
+ending on a page boundary. UNIBUS data paths are
+reserved for read and for
+write, given by \fIifrw_bdp\fP. The prototype of the map
+registers for read and for write is saved in \fIifrw_proto\fP.
+.PP
+When write transfers are not at least half-full pages on page boundaries,
+the data are just copied into the pages mapped on the UNIBUS
+and the transfer is started.
+If a write transfer is at least half a page long and on a page
+boundary, UNIBUS page table entries are swapped to reference
+the pages, and then the initial pages are
+remapped from \fIifw_wmap\fP when the transfer completes.
+The mbufs containing the mapped pages are placed on the \fIifw_xtofree\fP
+queue to be freed after transmission.
+.PP
+When read transfers give at least half a page of data to be input, page
+frames are allocated from a network page list and traded
+with the pages already containing the data, mapping the allocated
+pages to replace the input pages for the next UNIBUS data input.
+.PP
+The following utility routines are available for use in
+writing network interface drivers; all use the
+structures described above.
+.LP
+if_ubaminit(ifubinfo, uban, hlen, nmr, ifr, nr, ifx, nx);
+.br
+if_ubainit(ifuba, uban, hlen, nmr);
+.IP
+\fIif_ubaminit\fP allocates resources on UNIBUS adapter \fIuban\fP,
+storing the information in the \fIifubinfo\fP, \fIifrw\fP and \fIifxmt\fP
+structures referenced.
+The \fIifr\fP and \fIifx\fP parameters are pointers to arrays
+of \fIifrw\fP and \fIifxmt\fP structures whose dimensions
+are \fInr\fP and \fInx\fP, respectively.
+\fIif_ubainit\fP is a simpler, backwards-compatible interface used
+for hardware with single buffers of each type.
+They are called only at boot time or after a UNIBUS reset.
+One data path (buffered or unbuffered,
+depending on the \fIifu_flags\fP field) is allocated for each buffer.
+The \fInmr\fP parameter indicates
+the number of UNIBUS mapping registers required to map a maximal
+sized packet onto the UNIBUS, while \fIhlen\fP specifies the size
+of a local network header, if any, which should be mapped separately
+from the data (see the description of trailer protocols in chapter 14).
+Sufficient UNIBUS mapping registers and pages of memory are allocated
+to initialize the input data path for an initial read. For the output
+data path, mapping registers and pages of memory are also allocated
+and mapped onto the UNIBUS. The pages associated with the output
+data path are held in reserve in the event a write requires copying
+non-page-aligned data (see \fIif_wubaput\fP below).
+If \fIif_ubainit\fP is called with memory pages already allocated,
+they will be used instead of allocating new ones (this normally
+occurs after a UNIBUS reset).
+A 1 is returned when allocation and initialization are successful,
+0 otherwise.
+.LP
+m = if_ubaget(ifubinfo, ifr, totlen, off0, ifp);
+.br
+m = if_rubaget(ifuba, totlen, off0, ifp);
+.IP
+\fIif_ubaget\fP and \fIif_rubaget\fP pull input data
+out of an interface receive buffer and into an mbuf chain.
+The first interface passes pointers to the \fIifubinfo\fP structure
+for the interface and the \fIifrw\fP structure for the receive buffer;
+the second call may be used for single-buffered devices.
+\fItotlen\fP specifies the length of data to be obtained, not counting the
+local network header. If \fIoff0\fP is non-zero, it indicates
+a byte offset to a trailing local network header which should be
+copied into a separate mbuf and prepended to the front of the resultant mbuf
+chain. When the data amount to at least a half a page,
+the previously mapped data pages are remapped
+into the mbufs and swapped with fresh pages, thus avoiding
+any copy.
+The receiving interface is recorded as \fIifp\fP, a pointer to an \fIifnet\fP
+structure, for the use of the receiving network protocol.
+A 0 return value indicates a failure to allocate resources.
+.LP
+if_wubaput(ifubinfo, ifx, m);
+.br
+if_wubaput(ifuba, m);
+.IP
+\fIif_ubaput\fP and \fIif_wubaput\fP map a chain of mbufs
+onto a network interface in preparation for output.
+The first interface is used by devices with multiple transmit buffers.
+The chain includes any local network
+header, which is copied so that it resides in the mapped and
+aligned I/O space.
+Page-aligned data that are page-aligned in the output buffer
+are mapped to the UNIBUS in place of the normal buffer page,
+and the corresponding mbuf is placed on a queue to be freed after transmission.
+Any other mbufs which contained non-page-sized
+data portions are copied to the I/O space and then freed.
+Pages mapped from a previous output operation (no longer needed)
+are unmapped.
diff --git a/share/doc/smm/18.net/7.t b/share/doc/smm/18.net/7.t
new file mode 100644
index 0000000..8ccecc8
--- /dev/null
+++ b/share/doc/smm/18.net/7.t
@@ -0,0 +1,256 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)7.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.br
+.ne 30v
+.\".ds RH "Socket/protocol interface
+.NH
+\s+2Socket/protocol interface\s0
+.PP
+The interface between the socket routines and the communication
+protocols is through the \fIpr_usrreq\fP routine defined in the
+protocol switch table. The following requests to a protocol
+module are possible:
+.DS
+._d
+#define PRU_ATTACH 0 /* attach protocol */
+#define PRU_DETACH 1 /* detach protocol */
+#define PRU_BIND 2 /* bind socket to address */
+#define PRU_LISTEN 3 /* listen for connection */
+#define PRU_CONNECT 4 /* establish connection to peer */
+#define PRU_ACCEPT 5 /* accept connection from peer */
+#define PRU_DISCONNECT 6 /* disconnect from peer */
+#define PRU_SHUTDOWN 7 /* won't send any more data */
+#define PRU_RCVD 8 /* have taken data; more room now */
+#define PRU_SEND 9 /* send this data */
+#define PRU_ABORT 10 /* abort (fast DISCONNECT, DETATCH) */
+#define PRU_CONTROL 11 /* control operations on protocol */
+#define PRU_SENSE 12 /* return status into m */
+#define PRU_RCVOOB 13 /* retrieve out of band data */
+#define PRU_SENDOOB 14 /* send out of band data */
+#define PRU_SOCKADDR 15 /* fetch socket's address */
+#define PRU_PEERADDR 16 /* fetch peer's address */
+#define PRU_CONNECT2 17 /* connect two sockets */
+/* begin for protocols internal use */
+#define PRU_FASTTIMO 18 /* 200ms timeout */
+#define PRU_SLOWTIMO 19 /* 500ms timeout */
+#define PRU_PROTORCV 20 /* receive from below */
+#define PRU_PROTOSEND 21 /* send to below */
+.DE
+A call on the user request routine is of the form,
+.DS
+._f
+error = (*protosw[].pr_usrreq)(so, req, m, addr, rights);
+int error; struct socket *so; int req; struct mbuf *m, *addr, *rights;
+.DE
+The mbuf data chain \fIm\fP is supplied for output operations
+and for certain other operations where it is to receive a result.
+The address \fIaddr\fP is supplied for address-oriented requests
+such as PRU_BIND and PRU_CONNECT.
+The \fIrights\fP parameter is an optional pointer to an mbuf
+chain containing user-specified capabilities (see the \fIsendmsg\fP
+and \fIrecvmsg\fP system calls). The protocol is responsible for
+disposal of the data mbuf chains on output operations.
+A non-zero return value gives a
+UNIX error number which should be passed to higher level software.
+The following paragraphs describe each
+of the requests possible.
+.IP PRU_ATTACH
+.br
+When a protocol is bound to a socket (with the \fIsocket\fP
+system call) the protocol module is called with this
+request. It is the responsibility of the protocol module to
+allocate any resources necessary.
+The ``attach'' request
+will always precede any of the other requests, and should not
+occur more than once.
+.IP PRU_DETACH
+.br
+This is the antithesis of the attach request, and is used
+at the time a socket is deleted. The protocol module may
+deallocate any resources assigned to the socket.
+.IP PRU_BIND
+.br
+When a socket is initially created it has no address bound
+to it. This request indicates that an address should be bound to
+an existing socket. The protocol module must verify that the
+requested address is valid and available for use.
+.IP PRU_LISTEN
+.br
+The ``listen'' request indicates the user wishes to listen
+for incoming connection requests on the associated socket.
+The protocol module should perform any state changes needed
+to carry out this request (if possible). A ``listen'' request
+always precedes a request to accept a connection.
+.IP PRU_CONNECT
+.br
+The ``connect'' request indicates the user wants to a establish
+an association. The \fIaddr\fP parameter supplied describes
+the peer to be connected to. The effect of a connect request
+may vary depending on the protocol. Virtual circuit protocols,
+such as TCP [Postel81b], use this request to initiate establishment of a
+TCP connection. Datagram protocols, such as UDP [Postel80], simply
+record the peer's address in a private data structure and use
+it to tag all outgoing packets. There are no restrictions
+on how many times a connect request may be used after an attach.
+If a protocol supports the notion of \fImulti-casting\fP, it
+is possible to use multiple connects to establish a multi-cast
+group. Alternatively, an association may be broken by a
+PRU_DISCONNECT request, and a new association created with a
+subsequent connect request; all without destroying and creating
+a new socket.
+.IP PRU_ACCEPT
+.br
+Following a successful PRU_LISTEN request and the arrival
+of one or more connections, this request is made to
+indicate the user
+has accepted the first connection on the queue of
+pending connections. The protocol module should fill
+in the supplied address buffer with the address of the
+connected party.
+.IP PRU_DISCONNECT
+.br
+Eliminate an association created with a PRU_CONNECT request.
+.IP PRU_SHUTDOWN
+.br
+This call is used to indicate no more data will be sent and/or
+received (the \fIaddr\fP parameter indicates the direction of
+the shutdown, as encoded in the \fIsoshutdown\fP system call).
+The protocol may, at its discretion, deallocate any data
+structures related to the shutdown and/or notify a connected peer
+of the shutdown.
+.IP PRU_RCVD
+.br
+This request is made only if the protocol entry in the protocol
+switch table includes the PR_WANTRCVD flag.
+When a user removes data from the receive queue this request
+will be sent to the protocol module. It may be used to trigger
+acknowledgements, refresh windowing information, initiate
+data transfer, etc.
+.IP PRU_SEND
+.br
+Each user request to send data is translated into one or more
+PRU_SEND requests (a protocol may indicate that a single user
+send request must be translated into a single PRU_SEND request by
+specifying the PR_ATOMIC flag in its protocol description).
+The data to be sent is presented to the protocol as a list of
+mbufs and an address is, optionally, supplied in the \fIaddr\fP
+parameter. The protocol is responsible for preserving the data
+in the socket's send queue if it is not able to send it immediately,
+or if it may need it at some later time (e.g. for retransmission).
+.IP PRU_ABORT
+.br
+This request indicates an abnormal termination of service. The
+protocol should delete any existing association(s).
+.IP PRU_CONTROL
+.br
+The ``control'' request is generated when a user performs a
+UNIX \fIioctl\fP system call on a socket (and the ioctl is not
+intercepted by the socket routines). It allows protocol-specific
+operations to be provided outside the scope of the common socket
+interface. The \fIaddr\fP parameter contains a pointer to a static
+kernel data area where relevant information may be obtained or returned.
+The \fIm\fP parameter contains the actual \fIioctl\fP request code
+(note the non-standard calling convention).
+The \fIrights\fP parameter contains a pointer to an \fIifnet\fP structure
+if the \fIioctl\fP operation pertains to a particular network interface.
+.IP PRU_SENSE
+.br
+The ``sense'' request is generated when the user makes an \fIfstat\fP
+system call on a socket; it requests status of the associated socket.
+This currently returns a standard \fIstat\fP structure.
+It typically contains only the
+optimal transfer size for the connection (based on buffer size,
+windowing information and maximum packet size).
+The \fIm\fP parameter contains a pointer
+to a static kernel data area where the status buffer should be placed.
+.IP PRU_RCVOOB
+.br
+Any ``out-of-band'' data presently available is to be returned. An
+mbuf is passed to the protocol module, and the protocol
+should either place
+data in the mbuf or attach new mbufs to the one supplied if there is
+insufficient space in the single mbuf.
+An error may be returned if out-of-band data is not (yet) available
+or has already been consumed.
+The \fIaddr\fP parameter contains any options such as MSG_PEEK
+to examine data without consuming it.
+.IP PRU_SENDOOB
+.br
+Like PRU_SEND, but for out-of-band data.
+.IP PRU_SOCKADDR
+.br
+The local address of the socket is returned, if any is currently
+bound to it. The address (with protocol specific format) is returned
+in the \fIaddr\fP parameter.
+.IP PRU_PEERADDR
+.br
+The address of the peer to which the socket is connected is returned.
+The socket must be in a SS_ISCONNECTED state for this request to
+be made to the protocol. The address format (protocol specific) is
+returned in the \fIaddr\fP parameter.
+.IP PRU_CONNECT2
+.br
+The protocol module is supplied two sockets and requested to
+establish a connection between the two without binding any
+addresses, if possible. This call is used in implementing
+the
+.IR socketpair (2)
+system call.
+.PP
+The following requests are used internally by the protocol modules
+and are never generated by the socket routines. In certain instances,
+they are handed to the \fIpr_usrreq\fP routine solely for convenience
+in tracing a protocol's operation (e.g. PRU_SLOWTIMO).
+.IP PRU_FASTTIMO
+.br
+A ``fast timeout'' has occurred. This request is made when a timeout
+occurs in the protocol's \fIpr_fastimo\fP routine. The \fIaddr\fP
+parameter indicates which timer expired.
+.IP PRU_SLOWTIMO
+.br
+A ``slow timeout'' has occurred. This request is made when a timeout
+occurs in the protocol's \fIpr_slowtimo\fP routine. The \fIaddr\fP
+parameter indicates which timer expired.
+.IP PRU_PROTORCV
+.br
+This request is used in the protocol-protocol interface, not by the
+routines. It requests reception of data destined for the protocol and
+not the user. No protocols currently use this facility.
+.IP PRU_PROTOSEND
+.br
+This request allows a protocol to send data destined for another
+protocol module, not a user. The details of how data is marked
+``addressed to protocol'' instead of ``addressed to user'' are
+left to the protocol modules. No protocols currently use this facility.
diff --git a/share/doc/smm/18.net/8.t b/share/doc/smm/18.net/8.t
new file mode 100644
index 0000000..e65e656
--- /dev/null
+++ b/share/doc/smm/18.net/8.t
@@ -0,0 +1,166 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)8.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Protocol/protocol interface
+.br
+.ne 2i
+.NH
+\s+2Protocol/protocol interface\s0
+.PP
+The interface between protocol modules is through the \fIpr_usrreq\fP,
+\fIpr_input\fP, \fIpr_output\fP, \fIpr_ctlinput\fP, and
+\fIpr_ctloutput\fP routines. The calling conventions for all
+but the \fIpr_usrreq\fP routine are expected to be specific to
+the protocol
+modules and are not guaranteed to be consistent across protocol
+families. We
+will examine the conventions used for some of the Internet
+protocols in this section as an example.
+.NH 2
+pr_output
+.PP
+The Internet protocol UDP uses the convention,
+.DS
+error = udp_output(inp, m);
+int error; struct inpcb *inp; struct mbuf *m;
+.DE
+where the \fIinp\fP, ``\fIin\fP\^ternet
+\fIp\fP\^rotocol \fIc\fP\^ontrol \fIb\fP\^lock'',
+passed between modules conveys per connection state information, and
+the mbuf chain contains the data to be sent. UDP
+performs consistency checks, appends its header, calculates a
+checksum, etc. before passing the packet on.
+UDP is based on the Internet Protocol, IP [Postel81a], as its transport.
+UDP passes a packet to the IP module for output as follows:
+.DS
+error = ip_output(m, opt, ro, flags);
+int error; struct mbuf *m, *opt; struct route *ro; int flags;
+.DE
+.PP
+The call to IP's output routine is more complicated than that for
+UDP, as befits the additional work the IP module must do.
+The \fIm\fP parameter is the data to be sent, and the \fIopt\fP
+parameter is an optional list of IP options which should
+be placed in the IP packet header. The \fIro\fP parameter is
+is used in making routing decisions (and passing them back to the
+caller for use in subsequent calls). The
+final parameter, \fIflags\fP contains flags indicating whether the
+user is allowed to transmit a broadcast packet
+and if routing is to be performed. The broadcast flag may
+be inconsequential if the underlying hardware does not support the
+notion of broadcasting.
+.PP
+All output routines return 0 on success and a UNIX error number
+if a failure occurred which could be detected immediately
+(no buffer space available, no route to destination, etc.).
+.NH 2
+pr_input
+.PP
+Both UDP and TCP use the following calling convention,
+.DS
+(void) (*protosw[].pr_input)(m, ifp);
+struct mbuf *m; struct ifnet *ifp;
+.DE
+Each mbuf list passed is a single packet to be processed by
+the protocol module.
+The interface from which the packet was received is passed as the second
+parameter.
+.PP
+The IP input routine is a VAX software interrupt level routine,
+and so is not called with any parameters. It instead communicates
+with network interfaces through a queue, \fIipintrq\fP, which is
+identical in structure to the queues used by the network interfaces
+for storing packets awaiting transmission.
+The software interrupt is enabled by the network interfaces
+when they place input data on the input queue.
+.NH 2
+pr_ctlinput
+.PP
+This routine is used to convey ``control'' information to a
+protocol module (i.e. information which might be passed to the
+user, but is not data).
+.PP
+The common calling convention for this routine is,
+.DS
+(void) (*protosw[].pr_ctlinput)(req, addr);
+int req; struct sockaddr *addr;
+.DE
+The \fIreq\fP parameter is one of the following,
+.DS
+.ta \w'#define 'u +\w'PRC_UNREACH_NEEDFRAG 'u +8n
+#define PRC_IFDOWN 0 /* interface transition */
+#define PRC_ROUTEDEAD 1 /* select new route if possible */
+#define PRC_QUENCH 4 /* some said to slow down */
+#define PRC_MSGSIZE 5 /* message size forced drop */
+#define PRC_HOSTDEAD 6 /* normally from IMP */
+#define PRC_HOSTUNREACH 7 /* ditto */
+#define PRC_UNREACH_NET 8 /* no route to network */
+#define PRC_UNREACH_HOST 9 /* no route to host */
+#define PRC_UNREACH_PROTOCOL 10 /* dst says bad protocol */
+#define PRC_UNREACH_PORT 11 /* bad port # */
+#define PRC_UNREACH_NEEDFRAG 12 /* IP_DF caused drop */
+#define PRC_UNREACH_SRCFAIL 13 /* source route failed */
+#define PRC_REDIRECT_NET 14 /* net routing redirect */
+#define PRC_REDIRECT_HOST 15 /* host routing redirect */
+#define PRC_REDIRECT_TOSNET 14 /* redirect for type of service & net */
+#define PRC_REDIRECT_TOSHOST 15 /* redirect for tos & host */
+#define PRC_TIMXCEED_INTRANS 18 /* packet lifetime expired in transit */
+#define PRC_TIMXCEED_REASS 19 /* lifetime expired on reass q */
+#define PRC_PARAMPROB 20 /* header incorrect */
+.DE
+while the \fIaddr\fP parameter is the address to which the condition applies.
+Many of the requests have obviously been
+derived from ICMP (the Internet Control Message Protocol [Postel81c]),
+and from error messages defined in the 1822 host/IMP convention
+[BBN78]. Mapping tables exist to convert
+control requests to UNIX error codes which are delivered
+to a user.
+.NH 2
+pr_ctloutput
+.PP
+This is the routine that implements per-socket options at the protocol
+level for \fIgetsockopt\fP and \fIsetsockopt\fP.
+The calling convention is,
+.DS
+error = (*protosw[].pr_ctloutput)(op, so, level, optname, mp);
+int op; struct socket *so; int level, optname; struct mbuf **mp;
+.DE
+where \fIop\fP is one of PRCO_SETOPT or PRCO_GETOPT,
+\fIso\fP is the socket from whence the call originated,
+and \fIlevel\fP and \fIoptname\fP are the protocol level and option name
+supplied by the user.
+The results of a PRCO_GETOPT call are returned in an mbuf whose address
+is placed in \fImp\fP before return.
+On a PRCO_SETOPT call, \fImp\fP contains the address of an mbuf
+containing the option data; the mbuf should be freed before return.
diff --git a/share/doc/smm/18.net/9.t b/share/doc/smm/18.net/9.t
new file mode 100644
index 0000000..506037a
--- /dev/null
+++ b/share/doc/smm/18.net/9.t
@@ -0,0 +1,124 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)9.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Protocol/network-interface
+.br
+.ne 2i
+.NH
+\s+2Protocol/network-interface interface\s0
+.PP
+The lowest layer in the set of protocols which comprise a
+protocol family must interface itself to one or more network
+interfaces in order to transmit and receive
+packets. It is assumed that
+any routing decisions have been made before handing a packet
+to a network interface, in fact this is absolutely necessary
+in order to locate any interface at all (unless, of course,
+one uses a single ``hardwired'' interface). There are two
+cases with which to be concerned, transmission of a packet
+and receipt of a packet; each will be considered separately.
+.NH 2
+Packet transmission
+.PP
+Assuming a protocol has a handle on an interface, \fIifp\fP,
+a (struct ifnet\ *),
+it transmits a fully formatted packet with the following call,
+.DS
+error = (*ifp->if_output)(ifp, m, dst)
+int error; struct ifnet *ifp; struct mbuf *m; struct sockaddr *dst;
+.DE
+The output routine for the network interface transmits the packet
+\fIm\fP to the \fIdst\fP address, or returns an error indication
+(a UNIX error number). In reality transmission may
+not be immediate or successful; normally the output
+routine simply queues the packet on its send queue and primes
+an interrupt driven routine to actually transmit the packet.
+For unreliable media, such as the Ethernet, ``successful''
+transmission simply means that the packet has been placed on the cable
+without a collision. On the other hand, an 1822 interface guarantees
+proper delivery or an error indication for each message transmitted.
+The model employed in the networking system attaches no promises
+of delivery to the packets handed to a network interface, and thus
+corresponds more closely to the Ethernet. Errors returned by the
+output routine are only those that can be detected immediately,
+and are normally trivial in nature (no buffer space,
+address format not handled, etc.).
+No indication is received if errors are detected after the call has returned.
+.NH 2
+Packet reception
+.PP
+Each protocol family must have one or more ``lowest level'' protocols.
+These protocols deal with internetwork addressing and are responsible
+for the delivery of incoming packets to the proper protocol processing
+modules. In the PUP model [Boggs78] these protocols are termed Level
+1 protocols,
+in the ISO model, network layer protocols. In this system each such
+protocol module has an input packet queue assigned to it. Incoming
+packets received by a network interface are queued for the protocol
+module, and a VAX software interrupt is posted to initiate processing.
+.PP
+Three macros are available for queuing and dequeuing packets:
+.IP "IF_ENQUEUE(ifq, m)"
+.br
+This places the packet \fIm\fP at the tail of the queue \fIifq\fP.
+.IP "IF_DEQUEUE(ifq, m)"
+.br
+This places a pointer to the packet at the head of queue \fIifq\fP
+in \fIm\fP
+and removes the packet from the queue.
+A zero value will be returned in \fIm\fP if the queue is empty.
+.IP "IF_DEQUEUEIF(ifq, m, ifp)"
+.br
+Like IF_DEQUEUE, this removes the next packet from the head of a queue
+and returns it in \fIm\fP.
+A pointer to the interface on which the packet was received
+is placed in \fIifp\fP, a (struct ifnet\ *).
+.IP "IF_PREPEND(ifq, m)"
+.br
+This places the packet \fIm\fP at the head of the queue \fIifq\fP.
+.PP
+Each queue has a maximum length associated with it as a simple form
+of congestion control. The macro IF_QFULL(ifq) returns 1 if the queue
+is filled, in which case the macro IF_DROP(ifq) should be used to
+increment the count of the number of packets dropped, and the offending
+packet is dropped. For example, the following code fragment is commonly
+found in a network interface's input routine,
+.DS
+._f
+if (IF_QFULL(inq)) {
+ IF_DROP(inq);
+ m_freem(m);
+} else
+ IF_ENQUEUE(inq, m);
+.DE
diff --git a/share/doc/smm/18.net/Makefile b/share/doc/smm/18.net/Makefile
new file mode 100644
index 0000000..fde100a
--- /dev/null
+++ b/share/doc/smm/18.net/Makefile
@@ -0,0 +1,10 @@
+# @(#)Makefile 8.1 (Berkeley) 6/10/93
+
+DIR= smm/18.net
+SRCS= 0.t 1.t 2.t 3.t 4.t 5.t 6.t 7.t 8.t 9.t a.t b.t c.t d.t e.t f.t
+MACROS= -ms
+
+paper.ps: ${SRCS}
+ ${TBL} ${SRCS} | ${ROFF} > ${.TARGET}
+
+.include <bsd.doc.mk>
diff --git a/share/doc/smm/18.net/a.t b/share/doc/smm/18.net/a.t
new file mode 100644
index 0000000..dddba57
--- /dev/null
+++ b/share/doc/smm/18.net/a.t
@@ -0,0 +1,219 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)a.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Gateways and routing
+.br
+.ne 2i
+.NH
+\s+2Gateways and routing issues\s0
+.PP
+The system has been designed with the expectation that it will
+be used in an internetwork environment. The ``canonical''
+environment was envisioned to be a collection of local area
+networks connected at one or more points through hosts with
+multiple network interfaces (one on each local area network),
+and possibly a connection to a long haul network (for example,
+the ARPANET). In such an environment, issues of
+gatewaying and packet routing become very important. Certain
+of these issues, such as congestion
+control, have been handled in a simplistic manner or specifically
+not addressed.
+Instead, where possible, the network system
+attempts to provide simple mechanisms upon which more involved
+policies may be implemented. As some of these problems become
+better understood, the solutions developed will be incorporated
+into the system.
+.PP
+This section will describe the facilities provided for packet
+routing. The simplistic mechanisms provided for congestion
+control are described in chapter 12.
+.NH 2
+Routing tables
+.PP
+The network system maintains a set of routing tables for
+selecting a network interface to use in delivering a
+packet to its destination. These tables are of the form:
+.DS
+.ta \w'struct 'u +\w'u_long 'u +\w'sockaddr rt_gateway; 'u
+struct rtentry {
+ u_long rt_hash; /* hash key for lookups */
+ struct sockaddr rt_dst; /* destination net or host */
+ struct sockaddr rt_gateway; /* forwarding agent */
+ short rt_flags; /* see below */
+ short rt_refcnt; /* no. of references to structure */
+ u_long rt_use; /* packets sent using route */
+ struct ifnet *rt_ifp; /* interface to give packet to */
+};
+.DE
+.PP
+The routing information is organized in two separate tables, one
+for routes to a host and one for routes to a network. The
+distinction between hosts and networks is necessary so
+that a single mechanism may be used
+for both broadcast and multi-drop type networks, and
+also for networks built from point-to-point links (e.g
+DECnet [DEC80]).
+.PP
+Each table is organized as a hashed set of linked lists.
+Two 32-bit hash values are calculated by routines defined for
+each address family; one based on the destination being
+a host, and one assuming the target is the network portion
+of the address. Each hash value is used to
+locate a hash chain to search (by taking the value modulo the
+hash table size) and the entire 32-bit value is then
+used as a key in scanning the list of routes. Lookups are
+applied first to the routing
+table for hosts, then to the routing table for networks.
+If both lookups fail, a final lookup is made for a ``wildcard''
+route (by convention, network 0).
+The first appropriate route discovered is used.
+By doing this, routes to a specific host on a network may be
+present as well as routes to the network. This also allows a
+``fall back'' network route to be defined to a ``smart'' gateway
+which may then perform more intelligent routing.
+.PP
+Each routing table entry contains a destination (the desired final destination),
+a gateway to which to send the packet,
+and various flags which indicate the route's status and type (host or
+network). A count
+of the number of packets sent using the route is kept, along
+with a count of ``held references'' to the dynamically
+allocated structure to insure that memory reclamation
+occurs only when the route is not in use. Finally, a pointer to the
+a network interface is kept; packets sent using
+the route should be handed to this interface.
+.PP
+Routes are typed in two ways: either as host or network, and as
+``direct'' or ``indirect''. The host/network
+distinction determines how to compare the \fIrt_dst\fP field
+during lookup. If the route is to a network, only a packet's
+destination network is compared to the \fIrt_dst\fP entry stored
+in the table. If the route is to a host, the addresses must
+match bit for bit.
+.PP
+The distinction between ``direct'' and ``indirect'' routes indicates
+whether the destination is directly connected to the source.
+This is needed when performing local network encapsulation. If
+a packet is destined for a peer at a host or network which is
+not directly connected to the source, the internetwork packet
+header will
+contain the address of the eventual destination, while
+the local network header will address the intervening
+gateway. Should the destination be directly connected, these addresses
+are likely to be identical, or a mapping between the two exists.
+The RTF_GATEWAY flag indicates that the route is to an ``indirect''
+gateway agent, and that the local network header should be filled in
+from the \fIrt_gateway\fP field instead of
+from the final internetwork destination address.
+.PP
+It is assumed that multiple routes to the same destination will not
+be present; only one of multiple routes, that most recently installed,
+will be used.
+.PP
+Routing redirect control messages are used to dynamically
+modify existing routing table entries as well as dynamically
+create new routing table entries. On hosts where exhaustive
+routing information is too expensive to maintain (e.g. work
+stations), the
+combination of wildcard routing entries and routing redirect
+messages can be used to provide a simple routing management
+scheme without the use of a higher level policy process.
+Current connections may be rerouted after notification of the protocols
+by means of their \fIpr_ctlinput\fP entries.
+Statistics are kept by the routing table routines
+on the use of routing redirect messages and their
+affect on the routing tables. These statistics may be viewed using
+.IR netstat (1).
+.PP
+Status information other than routing redirect control messages
+may be used in the future, but at present they are ignored.
+Likewise, more intelligent ``metrics'' may be used to describe
+routes in the future, possibly based on bandwidth and monetary
+costs.
+.NH 2
+Routing table interface
+.PP
+A protocol accesses the routing tables through
+three routines,
+one to allocate a route, one to free a route, and one
+to process a routing redirect control message.
+The routine \fIrtalloc\fP performs route allocation; it is
+called with a pointer to the following structure containing
+the desired destination:
+.DS
+._f
+struct route {
+ struct rtentry *ro_rt;
+ struct sockaddr ro_dst;
+};
+.DE
+The route returned is assumed ``held'' by the caller until
+released with an \fIrtfree\fP call. Protocols which implement
+virtual circuits, such as TCP, hold onto routes for the duration
+of the circuit's lifetime, while connection-less protocols,
+such as UDP, allocate and free routes whenever their destination address
+changes.
+.PP
+The routine \fIrtredirect\fP is called to process a routing redirect
+control message. It is called with a destination address,
+the new gateway to that destination, and the source of the redirect.
+Redirects are accepted only from the current router for the destination.
+If a non-wildcard route
+exists to the destination, the gateway entry in the route is modified
+to point at the new gateway supplied. Otherwise, a new routing
+table entry is inserted reflecting the information supplied. Routes
+to interfaces and routes to gateways which are not directly accessible
+from the host are ignored.
+.NH 2
+User level routing policies
+.PP
+Routing policies implemented in user processes manipulate the
+kernel routing tables through two \fIioctl\fP calls. The
+commands SIOCADDRT and SIOCDELRT add and delete routing entries,
+respectively; the tables are read through the /dev/kmem device.
+The decision to place policy decisions in a user process implies
+that routing table updates may lag a bit behind the identification of
+new routes, or the failure of existing routes, but this period
+of instability is normally very small with proper implementation
+of the routing process. Advisory information, such as ICMP
+error messages and IMP diagnostic messages, may be read from
+raw sockets (described in the next section).
+.PP
+Several routing policy processes have already been implemented. The
+system standard
+``routing daemon'' uses a variant of the Xerox NS Routing Information
+Protocol [Xerox82] to maintain up-to-date routing tables in our local
+environment. Interaction with other existing routing protocols,
+such as the Internet EGP (Exterior Gateway Protocol), has been
+accomplished using a similar process.
diff --git a/share/doc/smm/18.net/b.t b/share/doc/smm/18.net/b.t
new file mode 100644
index 0000000..2e39a8a
--- /dev/null
+++ b/share/doc/smm/18.net/b.t
@@ -0,0 +1,145 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)b.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Raw sockets
+.br
+.ne 2i
+.NH
+\s+2Raw sockets\s0
+.PP
+A raw socket is an object which allows users direct access
+to a lower-level protocol. Raw sockets are intended for knowledgeable
+processes which wish to take advantage of some protocol
+feature not directly accessible through the normal interface, or
+for the development of new protocols built atop existing lower level
+protocols. For example, a new version of TCP might be developed at the
+user level by utilizing a raw IP socket for delivery of packets.
+The raw IP socket interface attempts to provide an identical interface
+to the one a protocol would have if it were resident in the kernel.
+.PP
+The raw socket support is built around a generic raw socket interface,
+(possibly) augmented by protocol-specific processing routines.
+This section will describe the core of the raw socket interface.
+.NH 2
+Control blocks
+.PP
+Every raw socket has a protocol control block of the following form:
+.DS
+.ta \w'struct 'u +\w'caddr_t 'u +\w'sockproto rcb_proto; 'u
+struct rawcb {
+ struct rawcb *rcb_next; /* doubly linked list */
+ struct rawcb *rcb_prev;
+ struct socket *rcb_socket; /* back pointer to socket */
+ struct sockaddr rcb_faddr; /* destination address */
+ struct sockaddr rcb_laddr; /* socket's address */
+ struct sockproto rcb_proto; /* protocol family, protocol */
+ caddr_t rcb_pcb; /* protocol specific stuff */
+ struct mbuf *rcb_options; /* protocol specific options */
+ struct route rcb_route; /* routing information */
+ short rcb_flags;
+};
+.DE
+All the control blocks are kept on a doubly linked list for
+performing lookups during packet dispatch. Associations may
+be recorded in the control block and used by the output routine
+in preparing packets for transmission.
+The \fIrcb_proto\fP structure contains the protocol family and protocol
+number with which the raw socket is associated.
+The protocol, family and addresses are
+used to filter packets on input; this will be described in more
+detail shortly. If any protocol-specific information is required,
+it may be attached to the control block using the \fIrcb_pcb\fP
+field.
+Protocol-specific options for transmission in outgoing packets
+may be stored in \fIrcb_options\fP.
+.PP
+A raw socket interface is datagram oriented. That is, each send
+or receive on the socket requires a destination address. This
+address may be supplied by the user or stored in the control block
+and automatically installed in the outgoing packet by the output
+routine. Since it is not possible to determine whether an address
+is present or not in the control block, two flags, RAW_LADDR and
+RAW_FADDR, indicate if a local and foreign address are present.
+Routing is expected to be performed by the underlying protocol
+if necessary.
+.NH 2
+Input processing
+.PP
+Input packets are ``assigned'' to raw sockets based on a simple
+pattern matching scheme. Each network interface or protocol
+gives unassigned packets
+to the raw input routine with the call:
+.DS
+raw_input(m, proto, src, dst)
+struct mbuf *m; struct sockproto *proto, struct sockaddr *src, *dst;
+.DE
+The data packet then has a generic header prepended to it of the
+form
+.DS
+._f
+struct raw_header {
+ struct sockproto raw_proto;
+ struct sockaddr raw_dst;
+ struct sockaddr raw_src;
+};
+.DE
+and it is placed in a packet queue for the ``raw input protocol'' module.
+Packets taken from this queue are copied into any raw sockets that
+match the header according to the following rules,
+.IP 1)
+The protocol family of the socket and header agree.
+.IP 2)
+If the protocol number in the socket is non-zero, then it agrees
+with that found in the packet header.
+.IP 3)
+If a local address is defined for the socket, the address format
+of the local address is the same as the destination address's and
+the two addresses agree bit for bit.
+.IP 4)
+The rules of 3) are applied to the socket's foreign address and the packet's
+source address.
+.LP
+A basic assumption is that addresses present in the
+control block and packet header (as constructed by the network
+interface and any raw input protocol module) are in a canonical
+form which may be ``block compared''.
+.NH 2
+Output processing
+.PP
+On output the raw \fIpr_usrreq\fP routine
+passes the packet and a pointer to the raw control block to the
+raw protocol output routine for any processing required before
+it is delivered to the appropriate network interface. The
+output routine is normally the only code required to implement
+a raw socket interface.
diff --git a/share/doc/smm/18.net/c.t b/share/doc/smm/18.net/c.t
new file mode 100644
index 0000000..2c7f752
--- /dev/null
+++ b/share/doc/smm/18.net/c.t
@@ -0,0 +1,151 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)c.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Buffering and congestion control
+.br
+.ne 2i
+.NH
+\s+2Buffering and congestion control\s0
+.PP
+One of the major factors in the performance of a protocol is
+the buffering policy used. Lack of a proper buffering policy
+can force packets to be dropped, cause falsified windowing
+information to be emitted by protocols, fragment host memory,
+degrade the overall host performance, etc. Due to problems
+such as these, most systems allocate a fixed pool of memory
+to the networking system and impose
+a policy optimized for ``normal'' network operation.
+.PP
+The networking system developed for UNIX is little different in this
+respect. At boot time a fixed amount of memory is allocated by
+the networking system. At later times more system memory
+may be requested as the need arises, but at no time is
+memory ever returned to the system. It is possible to
+garbage collect memory from the network, but difficult. In
+order to perform this garbage collection properly, some
+portion of the network will have to be ``turned off'' as
+data structures are updated. The interval over which this
+occurs must kept small compared to the average inter-packet
+arrival time, or too much traffic may
+be lost, impacting other hosts on the network, as well as
+increasing load on the interconnecting mediums. In our
+environment we have not experienced a need for such compaction,
+and thus have left the problem unresolved.
+.PP
+The mbuf structure was introduced in chapter 5. In this
+section a brief description will be given of the allocation
+mechanisms, and policies used by the protocols in performing
+connection level buffering.
+.NH 2
+Memory management
+.PP
+The basic memory allocation routines manage a private page map,
+the size of which determines the maximum amount of memory
+that may be allocated by the network.
+A small amount of memory is allocated at boot time
+to initialize the mbuf and mbuf page cluster free lists.
+When the free lists are exhausted, more memory is requested
+from the system memory allocator if space remains in the map.
+If memory cannot be allocated,
+callers may block awaiting free memory,
+or the failure may be reflected to the caller immediately.
+The allocator will not block awaiting free map entries, however,
+as exhaustion of the page map usually indicates that buffers have been lost
+due to a ``leak.''
+The private page table is used by the network buffer management
+routines in remapping pages to
+be logically contiguous as the need arises. In addition, an
+array of reference counts parallels the page table and is used
+when multiple references to a page are present.
+.PP
+Mbufs are 128 byte structures, 8 fitting in a 1Kbyte
+page of memory. When data is placed in mbufs,
+it is copied or remapped into logically contiguous pages of
+memory from the network page pool if possible.
+Data smaller than half of the size
+of a page is copied into one or more 112 byte mbuf data areas.
+.NH 2
+Protocol buffering policies
+.PP
+Protocols reserve fixed amounts of
+buffering for send and receive queues at socket creation time. These
+amounts define the high and low water marks used by the socket routines
+in deciding when to block and unblock a process. The reservation
+of space does not currently
+result in any action by the memory management
+routines.
+.PP
+Protocols which provide connection level flow control do this
+based on the amount of space in the associated socket queues. That
+is, send windows are calculated based on the amount of free space
+in the socket's receive queue, while receive windows are adjusted
+based on the amount of data awaiting transmission in the send queue.
+Care has been taken to avoid the ``silly window syndrome'' described
+in [Clark82] at both the sending and receiving ends.
+.NH 2
+Queue limiting
+.PP
+Incoming packets from the network are always received unless
+memory allocation fails. However, each Level 1 protocol
+input queue
+has an upper bound on the queue's length, and any packets
+exceeding that bound are discarded. It is possible for a host to be
+overwhelmed by excessive network traffic (for instance a host
+acting as a gateway from a high bandwidth network to a low bandwidth
+network). As a ``defensive'' mechanism the queue limits may be
+adjusted to throttle network traffic load on a host.
+Consider a host willing to devote some percentage of
+its machine to handling network traffic.
+If the cost of handling an
+incoming packet can be calculated so that an acceptable
+``packet handling rate''
+can be determined, then input queue lengths may be dynamically
+adjusted based on a host's network load and the number of packets
+awaiting processing. Obviously, discarding packets is
+not a satisfactory solution to a problem such as this
+(simply dropping packets is likely to increase the load on a network);
+the queue lengths were incorporated mainly as a safeguard mechanism.
+.NH 2
+Packet forwarding
+.PP
+When packets can not be forwarded because of memory limitations,
+the system attempts to generate a ``source quench'' message. In addition,
+any other problems encountered during packet forwarding are also
+reflected back to the sender in the form of ICMP packets. This
+helps hosts avoid unneeded retransmissions.
+.PP
+Broadcast packets are never forwarded due to possible dire
+consequences. In an early stage of network development, broadcast
+packets were forwarded and a ``routing loop'' resulted in network
+saturation and every host on the network crashing.
diff --git a/share/doc/smm/18.net/d.t b/share/doc/smm/18.net/d.t
new file mode 100644
index 0000000..675bece
--- /dev/null
+++ b/share/doc/smm/18.net/d.t
@@ -0,0 +1,73 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)d.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Out of band data
+.br
+.ne 2i
+.NH
+\s+2Out of band data\s0
+.PP
+Out of band data is a facility peculiar to the stream socket
+abstraction defined. Little agreement appears to exist as
+to what its semantics should be. TCP defines the notion of
+``urgent data'' as in-line, while the NBS protocols [Burruss81]
+and numerous others provide a fully independent logical
+transmission channel along which out of band data is to be
+sent.
+In addition, the amount of the data which may be sent as an out
+of band message varies from protocol to protocol; everything
+from 1 bit to 16 bytes or more.
+.PP
+A stream socket's notion of out of band data has been defined
+as the lowest reasonable common denominator (at least reasonable
+in our minds);
+clearly this is subject to debate. Out of band data is expected
+to be transmitted out of the normal sequencing and flow control
+constraints of the data stream. A minimum of 1 byte of out of
+band data and one outstanding out of band message are expected to
+be supported by the protocol supporting a stream socket.
+It is a protocol's prerogative to support larger-sized messages, or
+more than one outstanding out of band message at a time.
+.PP
+Out of band data is maintained by the protocol and is usually not
+stored in the socket's receive queue.
+A socket-level option, SO_OOBINLINE,
+is provided to force out-of-band data to be placed in the normal
+receive queue when urgent data is received;
+this sometimes amelioriates problems due to loss of data
+when multiple out-of-band
+segments are received before the first has been passed to the user.
+The PRU_SENDOOB and PRU_RCVOOB
+requests to the \fIpr_usrreq\fP routine are used in sending and
+receiving data.
diff --git a/share/doc/smm/18.net/e.t b/share/doc/smm/18.net/e.t
new file mode 100644
index 0000000..77e8a2a
--- /dev/null
+++ b/share/doc/smm/18.net/e.t
@@ -0,0 +1,129 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)e.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH "Trailer protocols
+.br
+.ne 2i
+.NH
+\s+2Trailer protocols\s0
+.PP
+Core to core copies can be expensive.
+Consequently, a great deal of effort was spent
+in minimizing such operations. The VAX architecture
+provides virtual memory hardware organized in
+page units. To cut down on copy operations, data
+is kept in page-sized units on page-aligned
+boundaries whenever possible. This allows data
+to be moved in memory simply by remapping the page
+instead of copying. The mbuf and network
+interface routines perform page table manipulations
+where needed, hiding the complexities of the VAX
+virtual memory hardware from higher level code.
+.PP
+Data enters the system in two ways: from the user,
+or from the network (hardware interface). When data
+is copied from the user's address space
+into the system it is deposited in pages (if sufficient
+data is present).
+This encourages the user to transmit information in
+messages which are a multiple of the system page size.
+.PP
+Unfortunately, performing a similar operation when taking
+data from the network is very difficult.
+Consider the format of an incoming packet. A packet
+usually contains a local network header followed by
+one or more headers used by the high level protocols.
+Finally, the data, if any, follows these headers. Since
+the header information may be variable length, DMA'ing the eventual
+data for the user into a page aligned area of
+memory is impossible without
+\fIa priori\fP knowledge of the format (e.g., by supporting
+only a single protocol header format).
+.PP
+To allow variable length header information to
+be present and still ensure page alignment of data,
+a special local network encapsulation may be used.
+This encapsulation, termed a \fItrailer protocol\fP [Leffler84],
+places the variable length header information after
+the data. A fixed size local network
+header is then prepended to the resultant packet.
+The local network header contains the size of the
+data portion (in units of 512 bytes), and a new \fItrailer protocol
+header\fP, inserted before the variable length
+information, contains the size of the variable length
+header information. The following trailer
+protocol header is used to store information
+regarding the variable length protocol header:
+.DS
+._f
+struct {
+ short protocol; /* original protocol no. */
+ short length; /* length of trailer */
+};
+.DE
+.PP
+The processing of the trailer protocol is very
+simple. On output, the local network header indicates that
+a trailer encapsulation is being used.
+The header also includes an indication
+of the number of data pages present before the trailer
+protocol header. The trailer protocol header is
+initialized to contain the actual protocol identifier and the
+variable length header size, and is appended to the data
+along with the variable length header information.
+.PP
+On input, the interface routines identify the
+trailer encapsulation
+by the protocol type stored in the local network header,
+then calculate the number of
+pages of data to find the beginning of the trailer.
+The trailing information is copied into a separate
+mbuf and linked to the front of the resultant packet.
+.PP
+Clearly, trailer protocols require cooperation between
+source and destination. In addition, they are normally
+cost effective only when sizable packets are used. The
+current scheme works because the local network encapsulation
+header is a fixed size, allowing DMA operations
+to be performed at a known offset from the first data page
+being received. Should the local network header be
+variable length this scheme fails.
+.PP
+Statistics collected indicate that as much as 200Kb/s
+can be gained by using a trailer protocol with
+1Kbyte packets. The average size of the variable
+length header was 40 bytes (the size of a
+minimal TCP/IP packet header). If hardware
+supports larger sized packets, even greater gains
+may be realized.
diff --git a/share/doc/smm/18.net/f.t b/share/doc/smm/18.net/f.t
new file mode 100644
index 0000000..18995fd
--- /dev/null
+++ b/share/doc/smm/18.net/f.t
@@ -0,0 +1,117 @@
+.\" Copyright (c) 1983, 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)f.t 8.1 (Berkeley) 6/8/93
+.\"
+.nr H2 1
+.\".ds RH Acknowledgements
+.br
+.ne 2i
+.SH
+\s+2Acknowledgements\s0
+.PP
+The internal structure of the system is patterned
+after the Xerox PUP architecture [Boggs79], while in certain
+places the Internet
+protocol family has had a great deal of influence in the design.
+The use of software interrupts for process invocation
+is based on similar facilities found in
+the VMS operating system.
+Many of the
+ideas related to protocol modularity, memory management, and network
+interfaces are based on Rob Gurwitz's TCP/IP implementation for the
+4.1BSD version of UNIX on the VAX [Gurwitz81].
+Greg Chesson explained his use of trailer encapsulations in Datakit,
+instigating their use in our system.
+.\".ds RH References
+.nr H2 1
+.sp 2
+.ne 2i
+.SH
+\s+2References\s0
+.LP
+.IP [Boggs79] 20
+Boggs, D. R., J. F. Shoch, E. A. Taft, and R. M. Metcalfe;
+\fIPUP: An Internetwork Architecture\fP. Report CSL-79-10.
+XEROX Palo Alto Research Center, July 1979.
+.IP [BBN78] 20
+Bolt Beranek and Newman;
+Specification for the Interconnection of Host and IMP.
+BBN Technical Report 1822. May 1978.
+.IP [Cerf78] 20
+Cerf, V. G.; The Catenet Model for Internetworking.
+Internet Working Group, IEN 48. July 1978.
+.IP [Clark82] 20
+Clark, D. D.; Window and Acknowledgement Strategy in TCP, RFC-813.
+Network Information Center, SRI International. July 1982.
+.IP [DEC80] 20
+Digital Equipment Corporation; \fIDECnet DIGITAL Network
+Architecture \- General Description\fP. Order No.
+AA-K179A-TK. October 1980.
+.IP [Gurwitz81] 20
+Gurwitz, R. F.; VAX-UNIX Networking Support Project \- Implementation
+Description. Internetwork Working Group, IEN 168.
+January 1981.
+.IP [ISO81] 20
+International Organization for Standardization.
+\fIISO Open Systems Interconnection \- Basic Reference Model\fP.
+ISO/TC 97/SC 16 N 719. August 1981.
+.IP [Joy86] 20
+Joy, W.; Fabry, R.; Leffler, S.; McKusick, M.; and Karels, M.;
+Berkeley Software Architecture Manual, 4.4BSD Edition.
+\fIUNIX Programmer's Supplementary Documents\fP, Vol. 1 (PSD:5).
+Computer Systems Research Group,
+University of California, Berkeley.
+May, 1986.
+.IP [Leffler84] 20
+Leffler, S.J. and Karels, M.J.; Trailer Encapsulations, RFC-893.
+Network Information Center, SRI International.
+April 1984.
+.IP [Postel80] 20
+Postel, J. User Datagram Protocol, RFC-768.
+Network Information Center, SRI International. May 1980.
+.IP [Postel81a] 20
+Postel, J., ed. Internet Protocol, RFC-791.
+Network Information Center, SRI International. September 1981.
+.IP [Postel81b] 20
+Postel, J., ed. Transmission Control Protocol, RFC-793.
+Network Information Center, SRI International. September 1981.
+.IP [Postel81c] 20
+Postel, J. Internet Control Message Protocol, RFC-792.
+Network Information Center, SRI International. September 1981.
+.IP [Xerox81] 20
+Xerox Corporation. \fIInternet Transport Protocols\fP.
+Xerox System Integration Standard 028112. December 1981.
+.IP [Zimmermann80] 20
+Zimmermann, H. OSI Reference Model \- The ISO Model of
+Architecture for Open Systems Interconnection.
+\fIIEEE Transactions on Communications\fP. Com-28(4); 425-432.
+April 1980.
diff --git a/share/doc/smm/18.net/spell.ok b/share/doc/smm/18.net/spell.ok
new file mode 100644
index 0000000..f9a387b
--- /dev/null
+++ b/share/doc/smm/18.net/spell.ok
@@ -0,0 +1,307 @@
+A,1986A
+AA
+ACCEPTCONN
+ADDR
+ARPANET
+ASYNC
+BBN
+BBN78
+Beranek
+Boggs
+Boggs78
+Boggs79
+Burruss81
+CANTRCVMORE
+CANTSENDMORE
+CLSIZE
+COLL
+CONNECT2
+CONNREQUIRED
+COPYALL
+CSL
+Catenet
+Cerf
+Cerf78
+Chesson
+Clark82
+Com
+DEC80
+DECnet
+DEQUEUE
+DEQUEUEIF
+DETATCH
+DMA
+DMA'ing
+DMR
+DONTWAIT
+Datagram
+Datakit
+EGP
+EWOULDBLOCK
+Ethernet
+FADDR
+FASTTIMO
+Fabry
+GETOPT
+Gurwitz
+Gurwitz's
+Gurwitz81
+HASCL
+HOSTDEAD
+HOSTUNREACH
+ICMP
+IEN
+IFDOWN
+IFF
+IFRW
+INTRANS
+IP
+IP's
+ISCONNECTED
+ISCONNECTING
+ISDISCONNECTING
+ISO
+ISO81
+Ircb
+Joy86
+K179A
+Karels
+LADDR
+LOOPBACK
+Leffler
+Leffler84
+M.J
+MAXNUBAMR
+MCLGET
+MFREE
+MGET
+MLEN
+MSG
+MSGSIZE
+MSIZE
+Mbufs
+McKusick
+Metcalfe
+NBIO
+NEEDFRAG
+NOARP
+NOFDREF
+NOTRAILERS
+NS
+Notes''SMM:15
+OOBINLINE
+OSI
+PARAMPROB
+PEERADDR
+PF
+POINTOPOINT
+PRC
+PRCO
+PRIV
+PROTORCV
+PROTOSEND
+PRU
+PS1:6
+Postel
+Postel80
+Postel81a
+Postel81b
+Postel81c
+QFULL
+RCVATMARK
+RCVD
+RCVOOB
+RFC
+RH
+ROUTEDEAD
+RTF
+S.J
+SB
+SEL
+SENDOOB
+SETOPT
+SIGIO
+SIOCADDRT
+SIOCDELRT
+SIOCGIFCONF
+SIOCSIFADDR
+SIOCSIFDSTADDR
+SIOCSIFFLAGS
+SIOGSIFFLAGS
+SLOWTIMO
+SMM:15
+SOCKADDR
+SRCFAIL
+SS
+Shoch
+TCP
+TIMXCEED
+TOSHOST
+TOSNET
+UDP
+UNIBUS
+VAX
+VMS
+Vol
+WANTRCVD
+Xerox81
+Xerox82
+Zimmermann
+Zimmermann80
+addr
+addrlist
+adj
+amelioriates
+async
+bdp
+broadaddr
+caddr
+csma
+ctlinput
+ctloutput
+daemon
+dat
+datagram
+decapsulation
+dequeuing
+dev
+dma'd
+dom
+dst
+dstaddr
+dtom
+faddr
+fastimo
+fasttimo
+fcntl
+freem
+fstat
+getsockopt
+hardwired
+hiwat
+hlen
+ierrors
+ifa
+ifaddr
+iff
+ifnet
+ifp
+ifq
+ifqueue
+ifr
+ifrw
+ifrw.ifrw
+ifu
+ifu.ifu
+ifuba
+ifubinfo
+ifw
+ifx
+ifxmt
+info
+info.iff
+init
+inp
+inpcb
+inq
+ip
+ipackets
+ipintrq
+laddr
+len
+loopback
+lowat
+m,n
+mb
+mbcnt
+mbmax
+mbuf
+mbuf's
+mbufs
+mp
+mr
+mtod
+mtu
+netstat
+nmr
+nr
+nx
+oerrors
+off0
+ontrol
+oob
+oobmark
+op
+opackets
+ops
+optname
+pcb
+pgrp
+prev
+proc
+proto
+protosw
+protoswNPROTOSW
+pte
+pullup
+q0len
+qlen
+qlimit
+rawcb
+rcb
+rcv
+recvmsg
+ref
+refcnt
+regs
+req
+rerouted
+ro
+rotocol
+rt
+rtalloc
+rtentry
+rtfree
+rtredirect
+rubaget
+sb
+sel
+sendmsg
+setsockopt
+slowtimo
+snd
+sockaddr
+sockbuf
+socketpair
+sockproto
+sonewconn
+soshutdown
+src
+ta
+ternet
+timeo
+totlen
+uba
+ubaalloc
+ubaget
+ubainit
+uballoc's
+ubaminit
+uban
+ubaput
+ubinfo
+udp
+unibus
+usrreq
+virt
+vm
+wildcard
+wmap
+wubaput
+x,t
+xmt
+xmt.ifrw
+xmt.ifw
+xswapd
+xtofree
+xxx
diff --git a/share/doc/smm/Makefile b/share/doc/smm/Makefile
new file mode 100644
index 0000000..2e22765
--- /dev/null
+++ b/share/doc/smm/Makefile
@@ -0,0 +1,28 @@
+# @(#)Makefile 8.1 (Berkeley) 6/10/93
+
+# The following modules do not build/install:
+# 10.named, 13.amd
+
+BINDIR= /usr/share/doc/smm
+FILES= 00.contents Makefile Title
+SUBDIR= 01.setup 02.config 03.fsck 04.quotas 05.fastfs 06.nfs 07.lpd \
+ 08.sendmailop 09.sendmail 11.timedop 12.timed 14.uucpimpl \
+ 15.uucpnet 16.security 17.password 18.net
+
+Title.ps: ${FILES}
+ groff Title > ${.TARGET}
+
+contents.ps: ${FILES}
+ groff -ms 00.contents > ${.TARGET}
+
+.if ${MACHINE} == "tahoe"
+SUBDIR+=01.setup
+.elif ${MACHINE} == "vax"
+SUBDIR+=01.setup
+.endif
+
+beforeinstall:
+ install -c -o ${BINOWN} -g ${BINGRP} -m 444 ${FILES} \
+ ${DESTDIR}${BINDIR}
+
+.include <bsd.subdir.mk>
diff --git a/share/doc/smm/Title b/share/doc/smm/Title
new file mode 100644
index 0000000..4dd1b89
--- /dev/null
+++ b/share/doc/smm/Title
@@ -0,0 +1,203 @@
+.\" Copyright (c) 1986, 1993 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)Title 8.2 (Berkeley) 4/19/94
+.\"
+.ps 18
+.vs 22
+.sp 2.75i
+.ft B
+.ce 2
+UNIX System Manager's Manual
+(SMM)
+.ps 14
+.vs 16
+.sp |4i
+.ce 2
+4.4 Berkeley Software Distribution
+.sp |5.75i
+.ft R
+.pt 12
+.vs 16
+.ce
+June, 1993
+.sp |8.2i
+.ce 5
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California
+Berkeley, California 94720
+.bp
+\&
+.sp |1i
+.hy 0
+.ps 10
+.vs 12p
+Copyright 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993
+The Regents of the University of California. All rights reserved.
+.sp 2
+Other than the specific manual pages and documents listed below
+as copyrighted by AT&T,
+redistribution and use of this manual in source and binary forms,
+with or without modification, are permitted provided that the
+following conditions are met:
+.sp 0.5
+.in +0.2i
+.ta 0.2i
+.ti -0.2i
+1) Redistributions of this manual must retain the copyright
+notices on this page, this list of conditions and the following disclaimer.
+.ti -0.2i
+2) Software or documentation that incorporates part of this manual must
+reproduce the copyright notices on this page, this list of conditions and
+the following disclaimer in the documentation and/or other materials
+provided with the distribution.
+.ti -0.2i
+3) All advertising materials mentioning features or use of this software
+must display the following acknowledgement:
+``This product includes software developed by the University of
+California, Berkeley and its contributors.''
+.ti -0.2i
+4) Neither the name of the University nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+.in -0.2i
+.sp
+\fB\s-1THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.\s+1\fP
+.sp 2
+The Institute of Electrical and Electronics Engineers and the American
+National Standards Committee X3, on Information Processing Systems have
+given us permission to reprint portions of their documentation.
+.sp
+In the following statement, the phrase ``this text'' refers to portions
+of the system documentation.
+.sp 0.5
+``Portions of this text are reprinted and reproduced in
+electronic form in 4.4BSD from IEEE Std 1003.1-1988, IEEE
+Standard Portable Operating System Interface for Computer Environments
+(POSIX), copyright 1988 by the Institute of Electrical and Electronics
+Engineers, Inc. In the event of any discrepancy between these versions
+and the original IEEE Standard, the original IEEE Standard is the referee
+document.''
+.sp
+In the following statement, the phrase ``This material'' refers to portions
+of the system documentation.
+.sp 0.5
+``This material is reproduced with permission from American National
+Standards Committee X3, on Information Processing Systems. Computer and
+Business Equipment Manufacturers Association (CBEMA), 311 First St., NW,
+Suite 500, Washington, DC 20001-2178. The developmental work of
+Programming Language C was completed by the X3J11 Technical Committee.''
+.sp 2
+Manual pages cron.8, icheck.8, ncheck.8, and sa.8
+and documents SMM:15, 16, and 17
+are copyright 1979, AT&T Bell Laboratories, Incorporated.
+Document SMM:14 is a modification of an earlier document that
+is copyrighted 1979 by AT&T Bell Laboratories, Incorporated.
+Holders of \x'-1p'UNIX\v'-4p'\s-3TM\s0\v'4p'/32V,
+System III, or System V software licenses are
+permitted to copy these documents, or any portion of them,
+as necessary for licensed use of the software,
+provided this copyright notice and statement of permission
+are included.
+.sp 2
+The views and conclusions contained in this manual are those of the
+authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of the University of California.
+.br
+.ll 6.5i
+.lt 6.5i
+.po .75i
+.in 0i
+.af % i
+.ds ET\"
+.de HD
+.po 0
+.lt 7.4i
+.tl ''''
+.lt
+.po
+'sp 18p
+.if o .tl '\\*(ET''\\*(OT'
+.if e .tl '\\*(OT''\\*(ET'
+'sp 18p
+.ns
+..
+.de FO
+'sp 18p
+.if e .tl '\s9\\*(Dt''\\*(Ed\s0'
+.if o .tl '\s9\\*(Ed''\\*(Dt\s0'
+'bp
+..
+.wh 0 HD
+.wh -60p FO
+.bp 5
+.ds ET \s9\f2Table \|of \|Contents\fP\s0
+.ds OT - % -
+.ce
+\f3TABLE \|OF \|CONTENTS\fP
+.nr x .5i
+.in +\nxu
+.nf
+.ta \n(.lu-\nxuR
+.de xx
+\\$1\f3 \a \fP\\$2
+..
+.de t
+.sp 1v
+.ne .5i
+.cs 3
+.ti -.5i
+.ss 18
+\f3\s9\\$2. \\$3\s0\fP
+.ss 12
+.if t .sp .5v
+.cs 3 36
+.ds Ed Section \\$2
+.ds Dt \\$3
+.so \\$1
+..
+.t /usr/src/share/man/man0/toc8 8 "System Maintenance"
+.in -.5i
+.cs 3
+.if n .ta 8n 16n 24n 32n 40n 48n 56n 64n 72n 80n
+.if t .ta .5i 1i 1.5i 2i 2.5i 3i 3.5i 4i 4.5i 5i 5.5i 6i 6.5i
diff --git a/share/doc/usd/00.contents b/share/doc/usd/00.contents
new file mode 100644
index 0000000..eae0ba8
--- /dev/null
+++ b/share/doc/usd/00.contents
@@ -0,0 +1,262 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)00.contents 8.2 (Berkeley) 4/20/94
+.\"
+.de ND
+.KE
+.sp
+.KS
+..
+.OH '''USD Contents'
+.EH 'USD Contents'''
+.TL
+UNIX User's Supplementary Documents (USD)
+.sp
+\s-24.4 Berkeley Software Distribution\s+2
+.sp
+\fRJune, 1993\fR
+.PP
+This volume contains documents which supplement the manual pages in
+.I
+The Unix User's Reference Manual
+.R
+for the 4.4BSD system as distributed by U.C. Berkeley.
+.sp
+.KS
+.SH
+Getting Started
+.ND
+.IP
+.tl 'Unix for Beginners \- Second Edition''USD:1'
+.QP
+An introduction to the most basic uses of the system.
+.ND
+.IP
+.tl 'Learn \- Computer\-Aided Instruction on UNIX (Second Edition)''USD:2'
+.QP
+Describes a computer-aided instruction program that walks new users through
+the basics of files, the editor, and document prepararation software.
+.ND
+.SH
+Basic Utilities
+.ND
+.IP
+.tl 'An Introduction to the UNIX Shell''USD:3'
+.QP
+Steve Bourne's introduction to the capabilities of
+.I sh,
+a command interpreter especially popular for writing shell scripts.
+.ND
+.IP
+.tl 'An Introduction to the C shell''USD:4'
+.QP
+This introduction to
+.I csh,
+(a command interpreter popular for interactive work) describes many
+commonly used UNIX commands, assumes little prior knowledge of UNIX,
+and has a glossary useful for beginners.
+.ND
+.IP
+.tl 'DC \- An Interactive Desk Calculator''USD:5'
+.QP
+A super HP calculator, if you do not need floating point.
+.ND
+.IP
+.tl 'BC \- An Arbitrary Precision Desk-Calculator Language''USD:6'
+.QP
+A front end for DC that provides infix notation, control flow, and
+built\-in functions.
+.ND
+.SH
+Communicating with the World
+.ND
+.IP
+.tl 'Mail Reference Manual''USD:7'
+.QP
+Complete details on one of the programs for sending and reading your mail.
+.ND
+.IP
+.tl 'The Rand MH Message Handling System''USD:8'
+.QP
+This system for managing your computer mail uses lots of small programs,
+instead of one large one.
+.ND
+.SH
+Text Editing
+.ND
+.IP
+.tl 'A Tutorial Introduction to the Unix Text Editor''USD:9'
+.QP
+An easy way to get started with the line editor,
+.I ed.
+.ND
+.IP
+.tl 'Advanced Editing on Unix''USD:10'
+.QP
+The next step.
+.ND
+.IP
+.tl 'An Introduction to Display Editing with Vi''USD:11'
+.QP
+The document to learn to use the \fIvi\fR screen editor.
+.ND
+.IP
+.tl 'Ex Reference Manual (Version 3.7)''USD:12'
+.QP
+The final reference for the \fIex\fR editor.
+.ND
+.IP
+.tl 'Vi Reference Manual''USD:13'
+.QP
+The definitive reference for the \fInvi\fR editor.
+.ND
+.IP
+.tl 'Jove Manual for UNIX Users''USD:14'
+.QP
+Jove is a small, self-documenting, customizable display editor, based on
+EMACS. A plausible alternative to
+.I vi.
+.ND
+.IP
+.tl 'SED \- A Non-interactive Text Editor''USD:15'
+.QP
+Describes a one-pass variant of
+.I ed
+useful as a filter for processing large files.
+.ND
+.IP
+.tl 'AWK \- A Pattern Scanning and Processing Language (Second Edition)''USD:16'
+.QP
+A program for data selection and transformation.
+.ND
+.SH
+Document Preparation
+.ND
+.IP
+.tl 'Typing Documents on UNIX: Using the \-ms Macros with Troff and Nroff''USD:17'
+.QP
+Describes and gives examples of the basic use of the typesetting tools and
+``-ms'', a frequently used package of formatting requests that make it easier
+to lay out most documents.
+.ND
+.IP
+.tl 'A Revised Version of \-ms''USD:18'
+.QP
+A brief description of the Berkeley revisions made to the \-ms formatting
+macros for nroff and troff.
+.ND
+.IP
+.tl 'Writing Papers with \fInroff\fR using \-me''USD:19'
+.QP
+Another popular macro package for
+.I nroff.
+.ND
+.IP
+.tl '\-me Reference Manual''USD:20'
+.QP
+The final word on \-me.
+.ND
+.IP
+.tl 'NROFF/TROFF User\'s Manual''USD:21'
+.QP
+Extremely detailed information about these document formatting programs.
+.ND
+.IP
+.tl 'A TROFF Tutorial''USD:22'
+.QP
+An introduction to the most basic uses of
+.I troff
+for those who really want to know such things, or want to write their
+own macros.
+.ND
+.IP
+.tl 'A System for Typesetting Mathematics''USD:23'
+.QP
+Describes
+.I eqn,
+an easy-to-learn language for high-quality mathematical typesetting.
+.ND
+.IP
+.tl 'Typesetting Mathematics \- User\'s Guide (Second Edition)''USD:24'
+.QP
+More details about how to use
+.I eqn.
+.ND
+.IP
+.tl 'Tbl \- A Program to Format Tables''USD:25'
+.QP
+A program for easily typesetting tabular material.
+.ND
+.IP
+.tl 'Refer \- A Bibliography System''USD:26'
+.QP
+An introduction to one set of tools used to maintain bibliographic databases.
+The major program,
+.I refer,
+is used to automatically retrieve and format the references
+based on document citations.
+.ND
+.IP
+.tl 'Some Applications of Inverted Indexes on the UNIX System''USD:27'
+.QP
+Mike Lesk's paper describes the
+.I refer
+programs in a somewhat larger context.
+.ND
+.IP
+.tl 'BIB \- A Program for Formatting Bibliographies''USD:28'
+.QP
+This is an alternative to
+.I refer
+for expanding citations in documents.
+.ND
+.IP
+.tl 'Writing Tools \- The STYLE and DICTION Programs''USD:29'
+.QP
+These are programs which can help you understand and improve your
+writing style.
+.ND
+.SH
+Amusements
+.ND
+.IP
+.tl 'A Guide to the Dungeons of Doom''USD:30'
+.QP
+An introduction to the popular game of \fIrogue\fP, a fantasy game
+which is one of the biggest known users of VAX cycles.
+.ND
+.IP
+.tl 'Star Trek''USD:31'
+.QP
+You are the Captain of the Starship Enterprise. Wipe out the
+Klingons and save the Federation.
+.KE
diff --git a/share/doc/usd/18.msdiffs/Makefile b/share/doc/usd/18.msdiffs/Makefile
new file mode 100644
index 0000000..c6353e0
--- /dev/null
+++ b/share/doc/usd/18.msdiffs/Makefile
@@ -0,0 +1,7 @@
+# @(#)Makefile 8.1 (Berkeley) 6/8/93
+
+DIR= usd/18.msdiffs
+SRCS= ms.diffs
+MACROS= -msU
+
+.include <bsd.doc.mk>
diff --git a/share/doc/usd/18.msdiffs/ms.diffs b/share/doc/usd/18.msdiffs/ms.diffs
new file mode 100644
index 0000000..1193f6f
--- /dev/null
+++ b/share/doc/usd/18.msdiffs/ms.diffs
@@ -0,0 +1,287 @@
+.\" Copyright (c) 1983, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)ms.diffs 8.1 (Berkeley) 6/8/93
+.\"
+.nr LL 6.5i
+.nr FL 6.0i
+.if t .nr PD .5v
+.if t .ds m \u\(ul\dm
+.if n .ds m -m
+.AM
+.OH 'A Revised Version of \*ms''USD:18-%'
+.EH 'USD:18-%''A Revised Version of \*ms'
+.TL
+A Revised Version of \*ms
+.AU
+Bill Tuthill
+.AI
+Computing Services
+University of California
+Berkeley, CA 94720
+.PP
+The \*ms macros have been slightly revised and re\%arranged for the
+Berkeley Unix distribution.
+Because of the rearrangement,
+the new macros can be read by the computer
+in about half the time required by the previous version of \*ms.
+This means that output will begin to appear between ten seconds
+and several minutes more quickly, depending on the system load.
+On long files, however, the savings in total time are not substantial.
+The old version of \*ms is still available as \*mos.
+.PP
+Several bugs in \*ms have been fixed, including
+a bad problem with the .1C macro,
+minor difficulties with boxed text,
+a break induced by .EQ before initialization,
+the failure to set tab stops in displays,
+and several bothersome errors in the \fBrefer\fP macros.
+Macros used only at Bell Laboratories have been removed.
+There are a few extensions to previous \*ms macros,
+and a number of new macros, but all the documented \*ms macros
+still work exactly as they did before, and have the same names as before.
+Output produced with \*ms should look like output produced with \*mos.
+.PP
+One important new feature is automatically numbered footnotes.
+Footnote numbers are printed by means of a pre-defined string
+(\e\(**\(**), which you invoke separately from .FS and .FE.
+Each time it is used, this string increases the footnote number by one,
+whether or not you use .FS and .FE in your text.
+Footnote numbers will be superscripted on the phototypesetter
+and on daisy-wheel terminals, but on low-resolution devices
+(such as the lpr and a crt), they will be bracketed.
+If you use \e\(**\(** to indicate numbered footnotes,
+then the .FS macro will automatically include
+the footnote number at the bottom of the page.
+This footnote, for example, was produced as follows:\**
+.DS
+This footnote, for example, was produced as follows:\e\(**\(**
+\&.FS
+.sp -.2
+ ...
+\&.FE
+.DE
+.FS
+If you never use the ``\e\(**\(**'' string,
+no footnote numbers will appear anywhere in the text,
+including down here.
+The output footnotes will look exactly like
+footnotes produced with \*mos.
+.FE
+If you are using \e\(**\(** to number footnotes,
+but want a particular footnote to be marked with an asterisk or a dagger,
+then give that mark as the first argument to .FS: \(dg
+.DS
+then give that mark as the first argument to .FS: \e(dg
+\&.FS \e(dg
+.sp -.2
+ ...
+\&.FE
+.DE
+.FS \(dg
+In the footnote, the dagger will appear where the footnote
+number would otherwise appear, as on the left.
+.FE
+Footnote numbering will be temporarily suspended,
+because the \e\(**\(** string is not used.
+Instead of a dagger, you could use an asterisk *
+or double dagger \(dd, represented as \|\e(dd.
+.PP
+Another new feature is a macro for printing theses
+according to Berkeley standards.
+This macro is called .TM, which stands for thesis mode.
+(It is much like the .th macro in \*me.)
+It will put page numbers in the upper right-hand corner;
+number the first page; suppress the date;
+and doublespace everything except quotes, displays, and keeps.
+Use it at the top of each file making up your thesis.
+Calling .TM defines the .CT macro for chapter titles,
+which skips to a new page and moves the pagenumber to the center footer.
+The .P1 (P one) macro can be used even without thesis mode
+to print the header on page 1,
+which is suppressed except in thesis mode.
+If you want roman numeral page numbering,
+use an ``.af\0PN\0i'' request.
+.PP
+There is a new macro especially for bibliography entries,
+called .XP, which stands for exdented paragraph.
+It will exdent the first line of the paragraph by \en(PI units,
+usually 5n (the same as the indent for the first line of a .PP).
+Most bibliographies are printed this way.
+Here are some examples of exdented paragraphs:
+.XP
+Lumley, Lyle S., \fISex in Crustaceans: Shell Fish Habits,\fP\|
+Harbinger Press, Tampa Bay and San Diego, October 1979.
+243 pages.
+The pioneering work in this field.
+.XP
+Leffadinger, Harry A., ``Mollusk Mating Season: 52 Weeks, or All Year?''
+in \fIActa Biologica,\fP\| vol. 42, no. 11, November 1980.
+A provocative thesis, but the conclusions are wrong.
+.LP
+Of course, you will have to take care of
+italicizing the book title and journal,
+and quoting the title of the journal article.
+Indentation or exdentation can be changed
+by setting the value of number register PI.
+.PP
+If you need to produce endnotes rather than footnotes,
+put the references in a file of their own.
+This is similar to what you would do if you were
+typing the paper on a conventional typewriter.
+Note that you can use automatic footnote numbering
+without actually having .FS and .FE pairs in your text.
+If you place footnotes in a separate file,
+you can use .IP macros with \e\(**\(**\| as a hanging tag;
+this will give you numbers at the left-hand margin.
+With some styles of endnotes,
+you would want to use .PP rather then .IP macros,
+and specify \e\(**\(** before the reference begins.
+.PP
+There are four new macros to help produce a table of contents.
+Table of contents entries must be enclosed in .XS and .XE pairs,
+with optional .XA macros for additional entries;
+arguments to .XS and .XA specify the page number,
+to be printed at the right.
+A final .PX macro prints out the table of contents.
+Here is a sample of typical input and output text:
+.DS
+\&.XS ii
+Introduction
+\&.XA 1
+Chapter 1: Review of the Literature
+\&.XA 23
+Chapter 2: Experimental Evidence
+\&.XE
+\&.PX
+.sp .5
+.lt 5.5i
+.tl ''\fBTable of Contents\fP''
+.ta 5i 5.5iR
+.sp
+Introduction  ii\|
+Chapter 1: Review of the Literature  1
+Chapter 2: Experimental Evidence  23
+.sp .5
+.DE
+The .XS and .XE pairs may also be used in the text,
+after a section header for instance,
+in which case page numbers are supplied automatically.
+However, most documents that require a table of contents
+are too long to produce in one run,
+which is necessary if this method is to work.
+It is recommended that you do a table of contents
+after finishing your document.
+To print out the table of contents, use the .PX macro;
+if you forget it, nothing will happen.
+.PP
+As an aid in producing text that will format correctly
+with both \fBnroff\fP and \fBtroff\fP,
+there are some new string definitions that define quotation marks
+and dashes for each of these two formatting programs.
+The \e\(**\^\u_\d string will yield two hyphens in \fBnroff\fP,
+but in \fBtroff\fP it will produce an em dash\*-
+like this one.
+The \e\(**Q and \e\(**U strings will produce
+`` and '' in \fBtroff\fP, but " in \fBnroff\fP.
+(In typesetting, the double quote is traditionally considered bad form.)
+.PP
+There are now a large number of optional
+foreign accent marks defined by the \*ms macros.
+All the accent marks available in \*mos are present,
+and they all work just as they always did.
+However, there are better definitions available
+by placing .AM at the beginning of your document.
+Unlike the \*mos accent marks,
+the accent strings should come \fIafter\fP\| the letter being accented.
+Here is a list of the diacritical marks,
+with examples of what they look like.
+.DS
+.ta 2i 3i
+name of accent input output
+\l'3.5i'
+acute accent e\e\(**\' e\*'
+grave accent e\e\(**\` e\*`
+circumflex o\e\(**\d^\u o\*^
+cedilla c\e\(**, c\*,
+tilde n\e\(**\d~\u n\*~
+question \e\(**? \*?
+exclamation \e\(**! \*!
+umlaut u\e\(**: u\*:
+digraph s \e\(**8 \*8
+hac\*vek c\e\(**v c\*v
+macron a\e\(**_ a\*_
+underdot s\e\(**. s\*.
+o-slash o\e\(**/ o\*/
+angstrom a\e\(**o a\*o
+yogh kni\e\(**3t kni\*3t
+Thorn \e\(**(Th \*(Th
+thorn \e\(**(th \*(th
+Eth \e\(**(D- \*(D-
+eth \e\(**(d- \*(d-
+hooked o \e\(**q \*q
+ae ligature \e\(**(ae \*(ae
+AE ligature \e\(**(Ae \*(Ae
+oe ligature \e\(**(oe \*(oe
+OE ligature \e\(**(Oe \*(Oe
+.DE
+If you want to use these new diacritical marks,
+don't forget the .AM at the top of your file.
+Without it, some will not print at all,
+and others will be placed on the wrong letter.
+.PP
+It is also possible to produce custom headers and footers
+that are different on even and odd pages.
+The .OH and .EH macros define odd and even headers,
+while .OF and .EF define odd and even footers.
+Arguments to these four macros are specified as with .tl.
+This document was produced with:
+.DS
+\&.OH \'\ef\^IThe -mx Macros\'\'Page %\ef\^P\'
+\&.EH \'\ef\^IPage %\'\'The -mx Macros\ef\^P\'
+.DE
+Note that it would be a error to have an apostrophe in the header text;
+if you need one, you will have to use a different delimiter
+around the left, center, and right portions of the title.
+You can use any character as a delimiter, provided it doesn't appear
+elsewhere in the argument to .OH, .EH, .OF, or EF.
+.PP
+The \*ms macros work in conjunction with
+the \fBtbl\fR, \fBeqn\fR, and \fBrefer\fR preprocessors.
+Macros to deal with these items are read in only as needed,
+as are the thesis macros (.TM),
+the special accent mark definitions (.AM),
+table of contents macros (.XS and .XE),
+and macros to format the optional cover page.
+The code for the \*ms package lives in /usr/lib/tmac/tmac.s,
+and sourced files reside in the directory /usr/ucb/lib/ms.
+.sp
+.tl '''\*(DY'
diff --git a/share/doc/usd/19.memacros/Makefile b/share/doc/usd/19.memacros/Makefile
new file mode 100644
index 0000000..95d8324
--- /dev/null
+++ b/share/doc/usd/19.memacros/Makefile
@@ -0,0 +1,7 @@
+# @(#)Makefile 8.1 (Berkeley) 6/8/93
+
+DIR= usd/19.memacros
+SRCS= intro.me
+MACROS= -me
+
+.include <bsd.doc.mk>
diff --git a/share/doc/usd/19.memacros/intro.me b/share/doc/usd/19.memacros/intro.me
new file mode 100644
index 0000000..2d8046a
--- /dev/null
+++ b/share/doc/usd/19.memacros/intro.me
@@ -0,0 +1,2347 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)intro.me 8.1 (Berkeley) 6/8/93
+.\"
+.UC 7
+.ll 6.5i
+.lt 6.5i
+.pn 0
+.ds MO 2.27\" version of -me to which this applies
+.nr si 3n
+\".he 'USING NROFF AND \-ME''%'
+.eh 'USD:19-%''Writing Papers with NROFF using \-me'
+.oh 'Writing Papers with NROFF using \-me''USD:19-%'
+.ds U \s-1UNIX\s0
+.ds N \s-1NROFF\s0
+.ds T \s-1TROFF\s0
+.+c
+.(l C
+.sz 14
+.b "Writing Papers with NROFF using \-me"
+.sz
+.sp 2
+.ul
+Eric P. Allman*
+.(f
+*Author's current address:
+Computer Science Division,
+EECS,
+University of California,
+Berkeley, California 94720.
+.)f
+.sp
+Project INGRES
+Electronics Research Laboratory
+University of California, Berkeley
+Berkeley, California 94720
+.)l
+.sp 4
+.pp
+This document describes
+the text processing facilities
+available on the \*U\(dg
+.(f
+\(dg\*U is a trademark
+of AT&T Bell Laboratories
+.)f
+operating system
+via \*N\(dg and the
+\-me
+macro package.
+It is assumed
+that the reader
+already is generally familiar
+with the \*U operating system
+and a text editor
+such as
+.b ex .
+This is intended to be a casual introduction,
+and
+as such not all material is covered.
+In particular,
+many variations and additional features
+of the \-me macro package
+are not explained.
+For a complete discussion of this
+and other issues,
+see
+.ul
+The \-me Reference Manual
+and
+.ul
+The \*N/\*T Reference Manual.
+.pp
+\*N, a computer program
+that runs on the \*U operating system,
+reads an input file
+prepared by the user
+and outputs a formatted paper
+suitable for publication or framing.
+The input consists of
+.i text ,
+or words to be printed,
+and
+.i requests ,
+which give instructions
+to the \*N program
+telling how to format the printed copy.
+.pp
+Section 1
+describes the basics
+of text processing.
+Section 2
+describes the basic requests.
+Section 3
+introduces displays.
+Annotations,
+such as footnotes,
+are handled in
+section 4.
+The more complex requests
+which are not discussed in section 2
+are covered in section 5.
+Finally,
+section 6
+discusses things you will need
+to know
+if you want to typeset documents.
+If you are a novice,
+you probably won't want to read beyond section 4
+until you have tried some of the basic features out.
+.pp
+When you have your raw text ready,
+call the \*N formatter by typing
+as a request to the \*U shell:
+.(b
+nroff \-me \-T\c
+.i "type files"
+.)b
+where
+.i type
+describes the type of terminal
+you are outputting to.
+Common values are
+.b dtc
+for a DTC 300s
+(daisy-wheel type)
+printer and
+.b lpr
+for the line printer.
+If the
+.b \-T
+flag is omitted,
+a
+.q "lowest common denominator"
+terminal is assumed;
+this is good for previewing output
+on most terminals.
+A complete description of options
+to the \*N command can be found in
+.ul
+The \*N/\*T Reference Manual.
+.pp
+The word
+.i argument
+is used in this manual
+to mean a word or number
+which appears on the same line
+as a request
+which modifies the meaning
+of that request.
+For example,
+the request
+.(b
+\&.sp
+.)b
+spaces one line,
+but
+.(b
+\&.sp 4
+.)b
+spaces four lines.
+The number
+.b 4
+is an
+.i argument
+to the
+.b .sp
+request
+which says to space four lines
+instead of one.
+Arguments are separated from the request
+and from each other
+by spaces.
+.sh 1 "Basics of Text Processing"
+.pp
+The primary function
+of \*N
+is to
+.i collect
+words from input lines,
+.i fill
+output lines with those words,
+.i justify
+the right hand margin by inserting extra spaces
+in the line,
+and output the result.
+For example,
+the input:
+.(b
+Now is the time
+for all good men
+to come to the aid
+of their party.
+Four score and seven
+years ago,...
+.)b
+will be read,
+packed onto output lines,
+and justified
+to produce:
+.(b F
+Now is the time
+for all good men
+to come to the aid
+of their party.
+Four score and seven
+years ago,...
+.)b
+Sometimes you may want to start a new output line
+even though the line you are on
+is not yet full;
+for example,
+at the end of a paragraph.
+To do this
+you can cause a
+.i break ,
+which
+starts a new output line.
+Some requests
+cause a break automatically,
+as do blank input lines
+and input lines beginning with a space.
+.pp
+Not all input lines
+are text to be formatted.
+Some of the input lines
+are
+.i requests
+which describe
+how to format the text.
+Requests always have a period
+or an apostrophe
+(\c
+.q "\|\(aa\|" )
+as the first character
+of the input line.
+.pp
+The text formatter
+also does more complex things,
+such as automatically numbering pages,
+skipping over page folds,
+putting footnotes in the correct place,
+and so forth.
+.pp
+I can offer you a few hints
+for preparing text
+for input to \*N.
+First,
+keep the input lines short.
+Short input lines are easier to edit,
+and \*N will pack words onto longer lines
+for you anyhow.
+In keeping with this,
+it is helpful
+to begin a new line
+after every period,
+comma,
+or phrase,
+since common corrections
+are to add or delete sentences
+or phrases.
+Second,
+do not put spaces at the end of lines,
+since this can sometimes confuse the \*N
+processor.
+Third,
+do not hyphenate words at the end of lines
+(except words that should have hyphens in them,
+such as
+.q mother-in-law );
+\*N is smart enough to hyphenate words
+for you as needed,
+but is not smart enough
+to take hyphens out
+and join a word back together.
+Also,
+words such as
+.q mother-in-law
+should not be broken
+over a line,
+since then you will get a space
+where not wanted,
+such as
+.tr @-
+.nh
+.q "mother@\ in@law" .
+.br
+.tr @@
+.hy 14
+.sh 1 "Basic Requests"
+.sh 2 "Paragraphs"
+.pp
+Paragraphs are begun
+by using the
+.b .pp
+request.
+For example,
+the input:
+.(b
+\&.pp
+Now is the time for all good men
+to come to the aid of their party.
+Four score and seven years ago,...
+.)b
+produces a blank line
+followed by an indented first line.
+The result is:
+.(b F
+.ti +\n(piu
+Now is the time for all good men
+to come to the aid of their party.
+Four score and seven years ago,...
+.)b
+.pp
+Notice that the sentences
+of the paragraphs
+.i "must not"
+begin with a space,
+since blank lines
+and lines beginning with spaces
+cause a break.
+For example,
+if I had typed:
+.(b
+\&.pp
+Now is the time for all good men
+ to come to the aid of their party.
+Four score and seven years ago,...
+.)b
+The output would be:
+.(b F
+.ti +\n(piu
+Now is the time for all good men
+ to come to the aid of their party.
+Four score and seven years ago,...
+.)b
+A new line begins after the word
+.q men
+because the second line began with a space character.
+.pp
+There are many
+fancier
+types of paragraphs,
+which will be described later.
+.sh 2 "Headers and Footers"
+.pp
+Arbitrary headers and footers
+can be put
+at the top and bottom
+of every page.
+Two requests
+of the form
+.b .he \ \c
+.i title
+and
+.b .fo \ \c
+.i title
+define the titles to put at the head and the foot
+of every page,
+respectively.
+The titles are called
+.i three-part
+titles,
+that is,
+there is a left-justified part,
+a centered part,
+and a right-justified part.
+To separate these three parts
+the first character of
+.i title
+(whatever it may be)
+is used as a delimiter.
+Any character may be used,
+but
+backslash
+and double quote marks
+should be avoided.
+The percent sign
+is replaced by the current page number
+whenever found in the title.
+For example,
+the input:
+.(b
+\&.he \(aa\(aa%\(aa\(aa
+\&.fo \(aaJane Jones\(aa\(aaMy Book\(aa
+.)b
+results in the page number
+centered at the top
+of each page,
+.q "Jane Jones"
+in the lower left corner,
+and
+.q "My Book"
+in the lower right corner.
+.sh 2 "Double Spacing"
+.pp
+.ls 2
+\*N will double space output text automatically if you
+use the request
+.b ".ls\ 2" ,
+as is done in this section.
+You can revert to single spaced mode
+by typing
+.b ".ls\ 1" .
+.ls 1
+.sh 2 "Page Layout"
+.pp
+A number of requests allow
+you to change the way the printed copy looks,
+sometimes called the
+.i layout
+of the output page.
+Most of these requests adjust the placing
+of
+.q "white space"
+(blank lines or spaces).
+In these explanations,
+characters in italics
+should be replaced with values you wish to use;
+bold characters
+represent characters which should actually be typed.
+.pp
+The
+.b .bp
+request
+starts a new page.
+.pp
+The request
+.b .sp \ \c
+.i N
+leaves
+.i N
+lines of blank space.
+.i N
+can be omitted
+(meaning skip a single line)
+or can be of the form
+.i N \^\c
+.b i
+(for
+.i N
+inches)
+or
+.i N \^\c
+.b c
+(for
+.i N
+centimeters).
+For example, the input:
+.(b
+\&.sp 1.5i
+My thoughts on the subject
+\&.sp
+.)b
+leaves one and a half inches of space,
+followed by the line
+.q "My thoughts on the subject" ,
+followed by a single blank line.
+.pp
+The
+.b .in \ \c
+.i +N
+request
+changes the amount of white space
+on the left of the page
+(the
+.i indent ).
+The argument
+.i N
+can be of the form
+.b + \c
+.i N
+(meaning leave
+.i N
+spaces more than you are already leaving),
+.b \- \c
+.i N
+(meaning leave less than you do now),
+or just
+.i N
+(meaning leave exactly
+.i N
+spaces).
+.i N
+can be of the form
+.i N \^\c
+.b i
+or
+.i N \^\c
+.b c
+also.
+For example,
+the input:
+.(b
+initial text
+\&.in 5
+some text
+\&.in +1i
+more text
+\&.in \-2c
+final text
+.)b
+produces
+.q "some text"
+indented exactly five spaces
+from the left margin,
+.q "more text"
+indented five spaces
+plus one inch
+from the left margin
+(fifteen spaces
+on a pica typewriter),
+and
+.q "final text"
+indented five spaces
+plus one inch
+minus two centimeters
+from the margin.
+That is,
+the output is:
+.(b
+initial text
+.in +5
+some text
+.in +1i
+more text
+.in -2c
+final text
+.)b
+.pp
+The
+.b .ti \ \c
+.i +N
+(temporary indent)
+request is used like
+.b .in \ \c
+.i +N
+when the indent
+should apply to one line only,
+after which it should revert
+to the previous indent.
+For example,
+the input:
+.(b
+\&.in 1i
+\&.ti 0
+Ware, James R. The Best of Confucius,
+Halcyon House, 1950.
+An excellent book containing translations of
+most of Confucius\(aa most delightful sayings.
+A definite must for anyone interested in the early foundations
+of Chinese philosophy.
+.)b
+produces:
+.in 1i+\n($iu
+.ti \n($iu
+Ware, James R. The Best of Confucius,
+Halcyon House, 1950.
+An excellent book containing translations of
+most of Confucius' most delightful sayings.
+A definite must for anyone interested in the early foundations
+of Chinese philosophy.
+.pp
+Text lines can be centered
+by using the
+.b .ce
+request.
+The line after the
+.b .ce
+is centered
+(horizontally)
+on the page.
+To center more than one line,
+use
+.b .ce \ \c
+.i N
+(where
+.i N
+is the number of lines to center),
+followed by the
+.i N
+lines.
+If you want to center many lines
+but don't want to count them,
+type:
+.(b
+\&.ce 1000
+lines to center
+\&.ce 0
+.)b
+The
+.b ".ce\ 0"
+request tells \*N to center zero more lines,
+in other words,
+stop centering.
+.pp
+All of these requests
+cause a break;
+that is,
+they always start
+a new line.
+If you want to start a new line
+without performing any other action,
+use
+.b .br .
+.sh 2 "Underlining"
+.pp
+Text can be underlined
+using the
+.b .ul
+request.
+The
+.b .ul
+request
+causes the next input line
+to be underlined when output.
+You can underline multiple lines
+by stating a count of
+.i input
+lines to underline,
+followed by those lines
+(as with the
+.b .ce
+request).
+For example,
+the input:
+.(b
+\&.ul 2
+Notice that these two input lines
+are underlined.
+.)b
+will underline those eight words in \*N.
+(In \*T they will be set in italics.)
+.sh 1 "Displays"
+.pp
+Displays are sections of text
+to be set off
+from the body of the paper.
+Major quotes,
+tables,
+and figures
+are types of displays,
+as are all the examples
+used in this document.
+All displays
+except centered blocks
+are output
+single spaced.
+.sh 2 "Major Quotes"
+.pp
+Major quotes
+are quotes which are several lines long,
+and hence are set in from the rest
+of the text
+without quote marks
+around them.
+These can be generated
+using the commands
+.b .(q
+and
+.b .)q
+to surround the quote.
+For example,
+the input:
+.(b
+As Weizenbaum points out:
+\&.(q
+It is said that to explain is to explain away.
+This maxim is nowhere so well fulfilled
+as in the areas of computer programming,...
+\&.)q
+.)b
+generates as output:
+.lp
+As Weizenbaum points out:
+.(q
+It is said that to explain is to explain away.
+This maxim is nowhere so well fulfilled
+as in the areas of computer programming,...
+.)q
+.sh 2 "Lists"
+.pp
+A
+.i list
+is an indented,
+single spaced,
+unfilled display.
+Lists should be used
+when the material to be printed
+should not be filled and justified
+like normal text,
+such as columns of figures
+or the examples used in this paper.
+Lists are surrounded
+by the requests
+.b .(l
+and
+.b .)l .
+For example,
+type:
+.(b
+Alternatives to avoid deadlock are:
+\&.(l
+Lock in a specified order
+Detect deadlock and back out one process
+Lock all resources needed before proceeding
+\&.)l
+.)b
+will produce:
+.br
+Alternatives to avoid deadlock are:
+.(l
+Lock in a specified order
+Detect deadlock and back out one process
+Lock all resources needed before proceeding
+.)l
+.sh 2 "Keeps"
+.pp
+A
+.i keep
+is a display of lines
+which are kept on a single page
+if possible.
+An example of where you would use a keep
+might be a diagram.
+Keeps differ from lists
+in that lists may be broken
+over a page boundary
+whereas keeps will not.
+.pp
+Blocks are the basic kind of keep.
+They begin with the request
+.b .(b
+and end with the request
+.b .)b .
+If there is not room on the current page
+for everything in the block,
+a new page is begun.
+This has the unpleasant effect
+of leaving blank space
+at the bottom of the page.
+When this is not appropriate,
+you can use the alternative,
+called
+.i "floating keeps" .
+.pp
+.i "Floating keeps"
+move relative to the text.
+Hence,
+they are good for things
+which will be referred to
+by name,
+such as
+.q "See figure 3" .
+A floating keep will appear
+at the bottom of the current page
+if it will fit;
+otherwise,
+it will appear at the top
+of the next page.
+Floating keeps begin with the line
+.b .(z
+and end with the line
+.b .)z .
+For an example of a floating keep,
+see figure 1.
+.(z
+.in 1i
+.xl -1i
+.hl
+\&.(z
+\&.hl
+Text of keep to be floated.
+\&.sp
+\&.ce
+Figure 1. Example of a Floating Keep.
+\&.hl
+\&.)z
+.sp
+.ce
+Figure 1. Example of a Floating Keep.
+.hl
+.)z
+The
+.b .hl
+request is used
+to draw a horizontal line
+so that the figure
+stands out from the text.
+.sh 2 "Fancier Displays"
+.pp
+Keeps and lists are normally collected in
+.i nofill
+mode,
+so that they are good for tables and such.
+If you want a display
+in fill mode
+(for text),
+type
+.b ".(l\ F"
+(Throughout this section,
+comments applied to
+.b .(l
+also apply to
+.b .(b
+and
+.b .(z ).
+This kind of display
+will be indented from both margins.
+For example,
+the input:
+.(b
+\&.(l F
+And now boys and girls,
+a newer, bigger, better toy than ever before!
+Be the first on your block to have your own computer!
+Yes kids, you too can have one of these modern
+data processing devices.
+You too can produce beautifully formatted papers
+without even batting an eye!
+\&.)l
+.)b
+will be output as:
+.(b F
+And now boys and girls,
+a newer, bigger, better toy than ever before!
+Be the first on your block to have your own computer!
+Yes kids, you too can have one of these modern
+data processing devices.
+You too can produce beautifully formatted papers
+without even batting an eye!
+.)b
+.pp
+Lists and blocks are also normally indented
+(floating keeps are normally left justified).
+To get a left-justified list,
+type
+.b ".(l\ L" .
+To get a list centered
+line-for-line,
+type
+.b ".(l C" .
+For example,
+to get a filled,
+left justified list, enter:
+.(b
+\&.(l L F
+text of block
+\&.)l
+.)b
+The input:
+.(b
+\&.(l
+first line of unfilled display
+more lines
+\&.)l
+.)b
+produces the indented text:
+.(b
+first line of unfilled display
+more lines
+.)b
+Typing the character
+.b L
+after the
+.b .(l
+request produces the left justified result:
+.(b L
+first line of unfilled display
+more lines
+.)b
+Using
+.b C
+instead of
+.b L
+produces the line-at-a-time centered output:
+.(b C
+first line of unfilled display
+more lines
+.)b
+.pp
+Sometimes it may be
+that you want to center several lines
+as a group,
+rather than centering them
+one line at a time.
+To do this
+use centered blocks,
+which are surrounded by the requests
+.b .(c
+and
+.b .)c .
+All the lines are centered as a unit,
+such that the longest line is centered
+and the rest are
+lined up around that line.
+Notice that lines
+do not move
+relative to each other
+using centered blocks,
+whereas they do
+using the
+.b C
+argument to keeps.
+.pp
+Centered blocks are
+.i not
+keeps,
+and may be used
+in conjunction
+with keeps.
+For example,
+to center a group of lines
+as a unit
+and keep them
+on one page,
+use:
+.(b
+\&.(b L
+\&.(c
+first line of unfilled display
+more lines
+\&.)c
+\&.)b
+.)b
+to produce:
+.(b L
+.(c
+first line of unfilled display
+more lines
+.)c
+.)b
+If the block requests
+(\c
+.b .(b
+and
+.b .)b )
+had been omitted
+the result would have been the same,
+but with no guarantee
+that the lines of the centered block
+would have all been on one page.
+Note the use of the
+.b L
+argument to
+.b .(b ;
+this causes the centered block
+to center within the entire line
+rather than within the line
+minus the indent.
+Also,
+the center requests
+must
+be nested
+.i inside
+the keep requests.
+.sh 1 "Annotations"
+.pp
+There are a number of requests
+to save text
+for later printing.
+.i Footnotes
+are printed at the bottom of the current page.
+.i "Delayed text"
+is intended to be a variant form
+of footnote;
+the text is printed only
+when explicitly called for,
+such as at the end of each chapter.
+.i Indexes
+are a type of delayed text
+having a tag
+(usually the page number)
+attached to each entry
+after a row of dots.
+Indexes are also saved
+until called for explicitly.
+.sh 2 "Footnotes"
+.pp
+Footnotes begin with the request
+.b .(f
+and end with the request
+.b .)f .
+The current footnote number is maintained
+automatically,
+and can be used by typing \e**,
+to produce a footnote number\**.
+.(f
+\**Like this.
+.)f
+The number is automatically incremented
+after every footnote.
+For example,
+the input:
+.(b
+\&.(q
+A man who is not upright
+and at the same time is presumptuous;
+one who is not diligent and at the same time is ignorant;
+one who is untruthful and at the same time is incompetent;
+such men I do not count among acquaintances.\e**
+\&.(f
+\e**James R. Ware,
+\&.ul
+The Best of Confucius,
+Halcyon House, 1950.
+Page 77.
+\&.)f
+\&.)q
+.)b
+generates the result:
+.(q
+A man who is not upright
+and at the same time is presumptuous;
+one who is not diligent and at the same time is ignorant;
+one who is untruthful and at the same time is incompetent;
+such men I do not count among acquaintances.\**
+.(f
+\**James R. Ware,
+.ul
+The Best of Confucius,
+Halcyon House, 1950.
+Page 77.
+.)f
+.)q
+It is important
+that the footnote
+appears
+.i inside
+the quote,
+so that you can be sure
+that the footnote
+will appear
+on the same page
+as the quote.
+.sh 2 "Delayed Text"
+.pp
+Delayed text
+is very similar to a footnote
+except that it is printed
+when called for explicitly.
+This allows a list of
+references to
+appear
+(for example)
+at the end of each chapter,
+as is the convention in some disciplines.
+Use
+.b \e*#
+on delayed text
+instead of
+.b \e**
+as on footnotes.
+.pp
+If you are using delayed text
+as your standard reference mechanism,
+you can still use footnotes,
+except that you may want to reference them
+with special characters*
+.(f
+*Such as an asterisk.
+.)f
+rather than numbers.
+.sh 2 "Indexes"
+.pp
+An
+.q index
+(actually more like a table of contents,
+since the entries are not sorted alphabetically)
+resembles delayed text,
+in that it is saved until called for.
+However,
+each entry has the page number
+(or some other tag)
+appended to the last line
+of the index entry
+after a row of dots.
+.pp
+Index entries begin with the request
+.b .(x
+and end with
+.b .)x .
+The
+.b .)x
+request may have a argument,
+which is the value to print
+as the
+.q "page number" .
+It defaults to the current page number.
+If the page number given is an underscore
+(\c
+.q _ )
+no page number
+or line of dots
+is printed at all.
+To get the line of dots
+without a page number,
+type
+.b ".)x """"" ,
+which specifies an explicitly null page number.
+.pp
+The
+.b .xp
+request prints the index.
+.pp
+For example,
+the input:
+.(b
+\&.(x
+Sealing wax
+\&.)x
+\&.(x
+Cabbages and kings
+\&.)x _
+\&.(x
+Why the sea is boiling hot
+\&.)x 2.5a
+\&.(x
+Whether pigs have wings
+\&.)x ""
+\&.(x
+This is a terribly long index entry, such as might be used
+for a list of illustrations, tables, or figures; I expect it to
+take at least two lines.
+\&.)x
+\&.xp
+.)b
+generates:
+.(x
+Sealing wax
+.)x
+.(x
+Cabbages and kings
+.)x _
+.(x
+Why the sea is boiling hot
+.)x 2.5a
+.(x
+Whether pigs have wings
+.)x ""
+.(x
+This is a terribly long index entry, such as might be used
+for a list of illustrations, tables, or figures; I expect it to
+take at least two lines.
+.)x
+.xp
+.pp
+The
+.b .(x
+request may have a single character
+argument,
+specifying the
+.q name
+of the index;
+the normal index is
+.b x .
+Thus,
+several
+.q indices
+may be maintained simultaneously
+(such as a list of tables, table of contents, etc.).
+.pp
+Notice that the index must be printed
+at the
+.i end
+of the paper,
+rather than at the beginning
+where it will probably appear
+(as a table of contents);
+the pages may have to be physically rearranged
+after printing.
+.sh 1 "Fancier Features"
+.pp
+A large number of fancier requests
+exist,
+notably requests to provide other sorts of paragraphs,
+numbered sections of the form
+.b 1.2.3
+(such as used in this document),
+and multicolumn output.
+.sh 2 "More Paragraphs"
+.pp
+Paragraphs generally start with
+a blank line
+and with the first line
+indented.
+It is possible to get
+left-justified block-style paragraphs
+by using
+.b .lp
+instead of
+.b .pp ,
+as demonstrated by the next paragraph.
+.lp
+Sometimes you want to use paragraphs
+that have the
+.i body
+indented,
+and the first line
+exdented
+(opposite of indented)
+with a label.
+This can be done with the
+.b .ip
+request.
+A word specified on the same line as
+.b .ip
+is printed in the margin,
+and the body is lined up
+at a prespecified position
+(normally five spaces).
+For example,
+the input:
+.(b
+\&.ip one
+This is the first paragraph.
+Notice how the first line
+of the resulting paragraph lines up
+with the other lines in the paragraph.
+\&.ip two
+And here we are at the second paragraph already.
+You may notice that the argument to \c
+.b .ip
+appears
+in the margin.
+\&.lp
+We can continue text...
+.)b
+produces as output:
+.ip one
+This is the first paragraph.
+Notice how the first line of the resulting paragraph lines up
+with the other lines in the paragraph.
+.ip two
+And here we are at the second paragraph already.
+You may notice that the argument to
+.b .ip
+appears
+in the margin.
+.lp
+We can continue text without starting a new indented
+paragraph
+by using the
+.b .lp
+request.
+.pp
+If you have spaces in the label of a
+.b .ip
+request,
+you must use an
+.q "unpaddable space"
+instead of a regular space.
+This is typed as a backslash character
+(\c
+.q \e )
+followed by a space.
+For example,
+to print the label
+.q "Part 1" ,
+enter:
+.(b
+\&.ip "Part\e 1"
+.)b
+.pp
+If a label of an indented paragraph
+(that is, the argument to
+.b .ip )
+is longer than the space allocated for the label,
+.b .ip
+will begin a new line after the label.
+For example,
+the input:
+.(b
+\&.ip longlabel
+This paragraph had a long label.
+The first character of text on the first line
+will not line up with the text on second and subsequent lines,
+although they will line up with each other.
+.)b
+will produce:
+.ip longlabel
+This paragraph had a long label.
+The first character of text on the first line
+will not line up with the text on second and subsequent lines,
+although they will line up with each other.
+.pp
+It is possible to change the size of the label
+by using a second argument
+which is the size of the label.
+For example,
+the above example could be done correctly
+by saying:
+.(b
+\&.ip longlabel 10
+.)b
+which will make the paragraph indent
+10 spaces for this paragraph only.
+If you have many paragraphs to indent
+all the same amount,
+use the
+.i "number register"
+.b ii .
+For example, to leave one inch of space
+for the label,
+type:
+.(b
+\&.nr ii 1i
+.)b
+somewhere before the first call to
+.b .ip .
+Refer to the reference manual
+for more information.
+.pp
+If
+.b .ip
+is used
+with no argument at all
+no hanging tag will be printed.
+For example,
+the input:
+.(b
+\&.ip [a]
+This is the first paragraph of the example.
+We have seen this sort of example before.
+\&.ip
+This paragraph is lined up with the previous paragraph,
+but it has no tag in the margin.
+.)b
+produces as output:
+.ip [a]
+This is the first paragraph of the example.
+We have seen this sort of example before.
+.ip
+This paragraph is lined up with the previous paragraph,
+but it has no tag in the margin.
+.pp
+A special case of
+.b .ip
+is
+.b .np ,
+which automatically
+numbers paragraphs sequentially from 1.
+The numbering is reset at the next
+.b .pp ,
+.b .lp ,
+or
+.b .sh
+(to be described in the next section)
+request.
+For example,
+the input:
+.(b
+\&.np
+This is the first point.
+\&.np
+This is the second point.
+Points are just regular paragraphs
+which are given sequence numbers automatically
+by the .np request.
+\&.pp
+This paragraph will reset numbering by .np.
+\&.np
+For example,
+we have reverted to numbering from one now.
+.)b
+generates:
+.np
+This is the first point.
+.np
+This is the second point.
+Points are just regular paragraphs
+which are given sequence numbers automatically
+by the .np request.
+.pp
+This paragraph will reset numbering by .np.
+.np
+For example,
+we have reverted to numbering from one now.
+.pp
+The
+.b .bu
+request gives lists of this sort that are identified with
+bullets rather than numbers.
+The paragraphs are also crunched together.
+For example,
+the input:
+.(b
+\&.bu
+\&One egg yolk
+\&.bu
+\&One tablespoon cream or top milk
+\&.bu
+\&Salt, cayenne, and lemon juice to taste
+\&.bu
+\&A generous two tablespoonfuls of butter
+.)b
+produces\**:
+.(f
+\**By the way,
+if you put the first three ingredients in a a heavy, deep pan
+and whisk the ingredients madly over a medium flame
+(never taking your hand off the handle of the pot)
+until the mixture reaches the consistency of custard
+(just a minute or two),
+then mix in the butter off-heat,
+you will have a wonderful Hollandaise sauce.
+.)f
+.bu
+One egg yolk
+.bu
+One tablespoon cream or top milk
+.bu
+Salt, cayenne, and lemon juice to taste
+.bu
+A generous two tablespoonfuls of butter
+.sh 2 "Section Headings"
+.pp
+Section numbers
+(such as the ones used in this document)
+can be automatically generated
+using the
+.b .sh
+request.
+You must tell
+.b .sh
+the
+.i depth
+of the section number
+and a section title.
+The depth
+specifies how many numbers
+are to appear
+(separated by decimal points)
+in the section number.
+For example,
+the section number
+.b 4.2.5
+has a depth of three.
+.pp
+Section numbers
+are incremented
+in a fairly intuitive fashion.
+If you add a number
+(increase the depth),
+the new number starts out
+at one.
+If you subtract section numbers
+(or keep the same number)
+the final number is incremented.
+For example,
+the input:
+.(b
+\&.sh 1 "The Preprocessor"
+\&.sh 2 "Basic Concepts"
+\&.sh 2 "Control Inputs"
+\&.sh 3
+\&.sh 3
+\&.sh 1 "Code Generation"
+\&.sh 3
+.)b
+produces as output the result:
+.(b
+.b
+1. The Preprocessor
+1.1. Basic Concepts
+1.2. Control Inputs
+1.2.1.
+1.2.2.
+2. Code Generation
+2.1.1.
+.)b
+.pp
+You can specify the section number to begin
+by placing the section number after the section title,
+using spaces instead of dots.
+For example,
+the request:
+.(b
+\&.sh 3 "Another section" 7 3 4
+.)b
+will begin the section numbered
+.b 7.3.4 ;
+all subsequent
+.b .sh
+requests will number relative to this number.
+.pp
+There are more complex features
+which will cause each section to be indented
+proportionally to the depth of the section.
+For example, if you enter:
+.(b
+\&.nr si \c
+.i N
+.)b
+each section will be indented by an amount
+.i N .
+.i N
+must have a scaling factor attached,
+that is, it must be of the form
+.i Nx ,
+where
+.i x
+is a character telling what units
+.i N
+is in.
+Common values for
+.i x
+are
+.b i
+for inches,
+.b c
+for centimeters,
+and
+.b n
+for
+.i ens
+(the width of a single character).
+For example,
+to indent each section
+one-half inch,
+type:
+.(b
+\&.nr si 0.5i
+.)b
+After this,
+sections will be indented by
+one-half inch
+per level of depth in the section number.
+For example,
+this document was produced
+using the request
+.(b
+\&.nr si 3n
+.)b
+at the beginning of the input file,
+giving three spaces of indent
+per section depth.
+.pp
+Section headers without automatically generated numbers
+can be done using:
+.(b
+\&.uh "Title"
+.)b
+which will do a section heading,
+but will put no number on the section.
+.sh 2 "Parts of the Basic Paper"
+.pp
+There are some requests
+which assist in setting up
+papers.
+The
+.b .tp
+request
+initializes for a title page.
+There are no headers or footers
+on a title page,
+and unlike other pages
+you can space down
+and leave blank space
+at the top.
+For example,
+a typical title page might appear as:
+.(b
+\&.tp
+\&.sp 2i
+\&.(l C
+THE GROWTH OF TOENAILS
+IN UPPER PRIMATES
+\&.sp
+by
+\&.sp
+Frank N. Furter
+\&.)l
+\&.bp
+.)b
+.pp
+The request
+.b .th
+sets up the environment
+of the \*N processor
+to do a thesis,
+using the rules established at Berkeley.
+It defines the correct headers and footers
+(a page number in the upper right hand corner only),
+sets the margins correctly,
+and double spaces.
+.pp
+The
+.b .+c \ \c
+.i T
+request can be used
+to start chapters.
+Each chapter is automatically numbered
+from one,
+and a heading is printed at the top of each chapter
+with the chapter number
+and the chapter name
+.i T .
+For example,
+to begin a chapter called
+.q Conclusions ,
+use the request:
+.(b
+\&.+c "CONCLUSIONS"
+.)b
+which will produce,
+on a new page,
+the lines
+.(b C
+CHAPTER 5
+CONCLUSIONS
+.)b
+with appropriate spacing for a thesis.
+Also, the header is moved to the foot of the page
+on the first page of a chapter.
+Although the
+.b .+c
+request was not designed to work only with the
+.b .th
+request,
+it is tuned for the format acceptable
+for a PhD thesis
+at Berkeley.
+.pp
+If the
+title parameter
+.i T
+is omitted from the
+.b .+c
+request,
+the result is a chapter with no heading.
+This can also be used at the beginning
+of a paper;
+for example,
+.b .+c
+was used to generate page one
+of this document.
+.pp
+Although
+papers traditionally have the abstract,
+table of contents,
+and so forth at the front of the paper,
+it is more convenient to format
+and print them last
+when using \*N.
+This is so that index entries
+can be collected and then printed
+for the table of contents
+(or whatever).
+At the end of the paper,
+issue the
+.b ".++ P"
+request,
+which begins the preliminary part
+of the paper.
+After issuing this request,
+the
+.b .+c
+request will begin a preliminary section
+of the paper.
+Most notably,
+this prints the page number
+restarted from one
+in lower case Roman numbers.
+.b .+c
+may be used repeatedly
+to begin different parts of the
+front material
+for example,
+the abstract,
+the table of contents,
+acknowledgments,
+list of illustrations,
+etc.
+The request
+.b ".++ B"
+may also be used
+to begin the bibliographic section
+at the end of the paper.
+For example,
+the paper might appear
+as outlined in figure 2.
+(In this figure,
+comments begin with the sequence
+.b \e" .)
+.(z
+.hl
+.if t .in 0.5i
+.if t .ta 2i
+.if n .ta 3i
+\&.th \e" set for thesis mode
+\&.fo \(aa\(aaDRAFT\(aa\(aa \e" define footer for each page
+\&.tp \e" begin title page
+\&.(l C \e" center a large block
+THE GROWTH OF TOENAILS
+IN UPPER PRIMATES
+\&.sp
+by
+\&.sp
+Frank Furter
+\&.)l \e" end centered part
+\&.+c INTRODUCTION \e" begin chapter named "INTRODUCTION"
+\&.(x t \e" make an entry into index `t'
+Introduction
+\&.)x \e" end of index entry
+text of chapter one
+\&.+c "NEXT CHAPTER" \e" begin another chapter
+\&.(x t \e" enter into index `t' again
+Next Chapter
+\&.)x
+text of chapter two
+\&.+c CONCLUSIONS
+\&.(x t
+Conclusions
+\&.)x
+text of chapter three
+\&.++ B \e" begin bibliographic information
+\&.+c BIBLIOGRAPHY \e" begin another `chapter'
+\&.(x t
+Bibliography
+\&.)x
+text of bibliography
+\&.++ P \e" begin preliminary material
+\&.+c "TABLE OF CONTENTS"
+\&.xp t \e" print index `t' collected above
+\&.+c PREFACE \e" begin another preliminary section
+text of preface
+.sp 2
+.in 0
+.ce
+Figure 2. Outline of a Sample Paper
+.hl
+.)z
+.sh 2 "Equations and Tables"
+.pp
+Two special \*U programs exist
+to format special types of material.
+.b Eqn
+and
+.b neqn
+set equations
+for the phototypesetter
+and \*N respectively.
+.b Tbl
+arranges to print
+extremely pretty tables
+in a variety of formats.
+This document will only describe
+the embellishments
+to the standard features;
+consult the reference manuals
+for those processors
+for a description of their use.
+.pp
+The
+.b eqn
+and
+.b neqn
+programs are described fully
+in the document
+.ul
+Typesetting Mathematics \- User's Guide
+by Brian W. Kernighan
+and Lorinda L. Cherry.
+Equations are centered,
+and are kept on one page.
+They are introduced by the
+.b .EQ
+request and terminated by the
+.b .EN
+request.
+.pp
+The
+.b .EQ
+request may take an
+equation number as an
+optional argument,
+which is printed vertically centered
+on the right hand side
+of the equation.
+If the equation becomes too long
+it should be split
+between two lines.
+To do this, type:
+.(b
+\&.EQ (eq 34)
+text of equation 34
+\&.EN C
+\&.EQ
+continuation of equation 34
+\&.EN
+.)b
+The
+.b C
+on the
+.b .EN
+request
+specifies that the equation
+will be continued.
+.pp
+The
+.b tbl
+program produces tables.
+It is fully described
+(including numerous examples)
+in the document
+.ul
+Tbl \- A Program to Format Tables
+by M. E. Lesk.
+Tables begin with the
+.b .TS
+request
+and end with the
+.b .TE
+request.
+Tables are normally kept on a single page.
+If you have a table which is too big
+to fit on a single page,
+so that you know it will extend
+to several pages,
+begin the table with the request
+.b ".TS\ H"
+and put the request
+.b .TH
+after the part of the table
+which you want
+duplicated at the top of every page
+that the table is printed on.
+For example, a table definition
+for a long table might look like:
+.ds TA \|\h'.4n'\v'-.2n'\s-4\zT\s0\v'.2n'\h'-.4n'\(ci\|
+.if n .ds TA \ \o'-T'\ \"
+.(b
+\&.TS H
+c s s
+n n n.
+THE TABLE TITLE
+\&.TH
+text of the table
+\&.TE
+.)b
+.pp
+.sh 2 "Two Column Output"
+.pp
+You can get two column output
+automatically
+by using the request
+.b .2c .
+This causes everything after it
+to be output in two-column form.
+The request
+.b .bc
+will start a new column;
+it differs from
+.b .bp
+in that
+.b .bp
+may leave a totally blank column
+when it starts a new page.
+To revert to single column output,
+use
+.b .1c .
+.sh 2 "Defining Macros"
+.pp
+A
+.i macro
+is a collection of requests and text
+which may be used
+by stating a simple request.
+Macros begin with the line
+.b ".de" \ \c
+.i xx
+(where
+.i xx
+is the name of the macro to be defined)
+and end with the line consisting of two dots.
+After defining the macro,
+stating the line
+.b . \c
+.i xx
+is the same as stating all the other lines.
+For example,
+to define a macro
+that spaces 3 lines
+and then centers the next input line,
+enter:
+.(b
+\&.de SS
+\&.sp 3
+\&.ce
+\&..
+.)b
+and use it by typing:
+.(b
+\&.SS
+\&Title Line
+(beginning of text)
+.)b
+.pp
+Macro names may be one or two characters.
+In order to avoid conflicts
+with names in \-me,
+always use upper case letters as names.
+The only names to avoid are
+.b TS ,
+.b TH ,
+.b TE ,
+.b EQ ,
+and
+.b EN .
+.sh 2 "Annotations Inside Keeps"
+.pp
+Sometimes you may want to put
+a footnote
+or index entry inside a keep.
+For example,
+if you want to maintain a
+.q "list of figures"
+you will want to do something like:
+.(b
+\&.(z
+\&.(c
+text of figure
+\&.)c
+\&.ce
+Figure 5.
+\&.(x f
+Figure 5
+\&.)x
+\&.)z
+.)b
+which you may hope
+will give you a figure
+with a label
+and an entry in the index
+.b f
+(presumably a list of figures index).
+Unfortunately,
+the
+index entry
+is read and interpreted
+when the keep is read,
+not when it is printed,
+so the page number in the index is likely to be wrong.
+The solution is to use the magic string
+.b \e!
+at the beginning of all the lines dealing with the index.
+In other words,
+you should use:
+.(b
+\&.(z
+\&.(c
+Text of figure
+\&.)c
+\&.ce
+Figure 5.
+\e!.(x f
+\e!Figure 5
+\e!.)x
+\&.)z
+.)b
+which will defer the processing of the index
+until the figure is output.
+This will guarantee
+that the page number in the index
+is correct.
+The same comments apply
+to
+blocks
+(with
+.b .(b
+and
+.b .)b )
+as well.
+.sh 1 "\*T and the Photosetter"
+.pp
+With a little care,
+you can prepare
+documents that
+will print nicely
+on either a regular terminal
+or when phototypeset
+using the \*T formatting program.
+.sh 2 "Fonts"
+.pp
+A
+.i font
+is a style of type.
+There are three fonts
+that are available simultaneously,
+Times Roman,
+Times Italic,
+and Times Bold,
+plus the special math font.
+The normal font is Roman.
+Text which would be underlined in \*N
+with the
+.b .ul
+request
+is set in italics
+in \*T.
+.pp
+There are ways of switching between fonts.
+The requests
+.b .r ,
+.b .i ,
+and
+.b .b
+switch to Roman,
+italic,
+and bold fonts respectively.
+You can set a single word
+in some font
+by typing (for example):
+.(b
+\&.i word
+.)b
+which will set
+.i word
+in italics
+but does not affect the surrounding text.
+In \*N,
+italic and bold text
+is underlined.
+.pp
+Notice that if you are setting more than one word
+in whatever font,
+you must surround that word with double quote marks
+(`\|"\|')
+so that it will appear to the \*N processor as a single word.
+The quote marks will not appear in the formatted text.
+If you do want a quote mark to appear,
+you should quote the entire string
+(even if a single word),
+and use
+.i two
+quote marks where you want one to appear.
+For example,
+if you want to produce the text:
+.(b
+.i """Master Control\|"""
+.)b
+in italics, you must type:
+.(b
+\&.i """Master Control\e|"""
+.)b
+The
+.b \e|
+produces a very narrow space
+so that the
+.q l
+does not overlap the quote sign in \*T,
+like this:
+.(b
+.i """Master Control"""
+.)b
+.pp
+There are also several
+.q pseudo-fonts
+available.
+The input:
+.(b
+\&.(b
+\&.u underlined
+\&.bi "bold italics"
+\&.bx "words in a box"
+\&.)b
+.)b
+generates
+.(b
+.u underlined
+.bi "bold italics"
+.bx "words in a box"
+.)b
+In \*N these all just underline
+the text.
+Notice that pseudo font requests
+set only the single parameter in the pseudo font;
+ordinary font requests will begin setting all text
+in the special font
+if you do not provide a parameter.
+No more than one word
+should appear
+with these three font requests
+in the middle of lines.
+This is because
+of the way \*T justifies text.
+For example,
+if you were to issue the requests:
+.(b
+\&.bi "some bold italics"
+and
+\&.bx "words in a box"
+.)b
+in the middle of a line
+\*T would produce
+.bi "some bold italics"
+and
+.bx "words in a box" ,\c
+.if t \p
+.if n \& \"
+.if t which I think you will agree does not look good.
+.if n which would look really lousy in \*T.
+.pp
+The second parameter
+of all font requests
+is set in the original font.
+For example,
+the font request:
+.(b
+\&.b bold face
+.)b
+generates
+.q bold
+in bold font,
+but sets
+.q face
+in the font of the surrounding text,
+resulting in:
+.(b
+.b bold face.
+.)b
+To set the two words
+.b bold
+and
+.b face
+both in
+.b "bold face" ,
+type:
+.(b
+\&.b "bold face"
+.)b
+.pp
+You can mix fonts in a word by using the
+special sequence
+.b \ec
+at the end of a line
+to indicate
+.q "continue text processing" ;
+this allows input lines
+to be joined together
+without a space between them.
+For example, the input:
+.(b
+\&.u under \ec
+\&.i italics
+.)b
+generates
+.u under \c
+.i italics ,
+but if we had typed:
+.(b
+\&.u under
+\&.i italics
+.)b
+the result would have been
+.u under
+.i italics
+as two words.
+.sh 2 "Point Sizes"
+.pp
+The phototypesetter
+supports different sizes of type,
+measured in points.
+The default point size
+is 10 points
+for most text,
+8 points for footnotes.
+To change the pointsize,
+type:
+.(b
+\&.sz \c
+.i +N
+.)b
+where
+.i N
+is the size wanted in points.
+The
+.i "vertical spacing"
+(distance between the bottom of most letters
+(the
+.i baseline )
+between adjacent lines)
+is set to be proportional
+to the type size.
+.pp
+These pointsize changes are
+.i temporary !!!
+For example,
+to reset the pointsize of basic text to twelve point, use:
+.(b
+\&.nr pp 12
+\&.nr sp 12
+\&.nr tp 12
+.)b
+to reset the default pointsize of
+paragraphs,
+section headers,
+and titles respectively.
+If you only want to set the names of sections in a larger pointsize,
+use:
+.(b
+\&.nr sp 11
+.)b
+alone \*- this sets section titles
+(e.g.,
+.b "Point Sizes"
+above)
+in a larger font than the default.
+.pp
+A single word or phrase can be set in a smaller pointsize
+than the surrounding text
+using the
+.b .sm
+request.
+This is especially convenient for words that are all capitals,
+due to the optical illusion that makes them look even larger
+than they actually are.
+For example:
+.(b
+\&.sm UNIX
+.)b
+prints as
+.sm UNIX
+rather than
+UNIX.
+.pp
+Warning:
+changing point sizes
+on the phototypesetter
+is a slow mechanical operation.
+On laser printers it may require loading new fonts.
+Size changes
+should be considered carefully.
+.sh 2 "Quotes"
+.pp
+It is conventional when using
+the typesetter to
+use pairs of grave and acute accents
+to generate double quotes,
+rather than the
+double quote character
+(`\|"\|').
+This is because it looks better
+to use grave and acute accents;
+for example, compare
+"quote" to
+``quote''.
+.pp
+In order to make quotes compatible
+between the typesetter and terminals,
+you may use the sequences
+.b \e*(lq
+and
+.b \e*(rq
+to stand for the left and right quote
+respectively.
+These both appear as
+.b """"
+on most terminals,
+but are typeset as
+.b ``
+and
+.b ''
+respectively.
+For example,
+use:
+.(b
+\e*(lqSome things aren\(aat true
+even if they did happen.\e*(rq
+.)b
+to generate the result:
+.(b
+.q "Some things aren't true even if they did happen."
+.)b
+As a shorthand,
+the special font request:
+.(b
+\&.q "quoted text"
+.)b
+will generate
+.q "quoted text" .
+Notice that you must surround
+the material to be quoted
+with double quote marks
+if it is more than one word.
+.sh 0
+.sp 1i
+.b Acknowledgments
+.pp
+I would like to thank
+Bob Epstein,
+Bill Joy,
+and Larry Rowe
+for having the courage
+to use the \-me macros
+to produce non-trivial papers
+during the development stages;
+Ricki Blau,
+Pamela Humphrey,
+and Jim Joyce
+for their help with the documentation phase;
+peter kessler
+for numerous complaints years after I was
+.q done
+with this project,
+most accompanied by fixes
+(hence forcing me to fix several small bugs);
+and the plethora of people who have contributed ideas
+and have given support for the project.
+.sp 1i
+This document was
+.if n \*N'ed
+.if t \*T'ed
+on \*(td
+and applies to version
+\*(MO
+of the \-me macros.
diff --git a/share/doc/usd/20.meref/Makefile b/share/doc/usd/20.meref/Makefile
new file mode 100644
index 0000000..7307e80
--- /dev/null
+++ b/share/doc/usd/20.meref/Makefile
@@ -0,0 +1,7 @@
+# @(#)Makefile 8.1 (Berkeley) 6/8/93
+
+DIR= usd/20.meref
+SRCS= ref.me
+MACROS= -me
+
+.include <bsd.doc.mk>
diff --git a/share/doc/usd/20.meref/ref.me b/share/doc/usd/20.meref/ref.me
new file mode 100644
index 0000000..66d0060
--- /dev/null
+++ b/share/doc/usd/20.meref/ref.me
@@ -0,0 +1,2384 @@
+.\" Copyright (c) 1986, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)ref.me 8.1 (Berkeley) 6/8/93
+.\"
+.UC 7
+.ll 6.5i
+.lt 6.5i
+.pn 0
+.ds MO 2.27\" \" mod number
+.de TL \" *** title line
+.lp
+.di XX
+..
+.de DE \" *** description
+\\\\h'|\\n(DIu'\\\\c
+.br
+.di
+.in +\\n(DIu
+.ti 0
+.cu 1000
+.XX
+.rm XX
+.cu 0
+..
+.ds N \s-1NROFF\s0
+.ds T \s-1TROFF\s0
+.nr DI 1.5i
+\".he '\-ME REFERENCE MANUAL''%'
+.de NR
+.b "\en\\$1" "\\$2"
+..
+.de ST
+.b "\e*\\$1" "\\$2"
+..
+.sc
+.eh 'USD:20-%''\-me Reference Manual'
+.oh '\-me Reference Manual''USD:20-%'
+.+c
+.ce 20
+.sz 14
+.b "\-ME REFERENCE MANUAL"
+.sz
+.sp
+.i "Release \*(MO"
+.sp 2
+.ul
+Eric P. Allman*
+.(f
+*Author's current address:
+Computer Science Division, EECS,
+University of California,
+Berkeley, California 94720.
+.)f
+.sp
+Project INGRES
+Electronics Research Laboratory
+University of California, Berkeley
+Berkeley, California 94720
+.ce 0
+.sp 4
+.pp
+This document describes
+in extremely terse form
+the features
+of the
+.b \-me
+macro package
+for version seven \*N/\*T\*(dg.
+.(f
+\(dg\*N and \*T may be trademarks of AT&T Bell Laboratories.
+.)f
+Some familiarity is assumed
+with
+those programs.
+Specifically,
+the reader should understand
+breaks,
+fonts,
+pointsizes,
+the use and definition of number registers
+and strings,
+how to define macros,
+and scaling factors for ens, points,
+.b v 's
+(vertical line spaces),
+etc.
+.pp
+For a more casual introduction
+to text processing
+using \*N,
+refer to the document
+.ul
+Writing Papers with \*N using \-me.
+.pp
+There are a number of macro parameters
+that may be adjusted.
+Fonts may be set to a font number only.
+Font 8 means bold font in \*T;
+in \*N font 8
+is underlined
+unless the
+.b \-rb3
+flag is specified to use
+.q "true bold"
+font
+(most versions of \*N do not interpret bold font nicely).
+Font 0 is no font change;
+the font of the surrounding text
+is used instead.
+Notice that fonts 0 and 8 are
+.q pseudo-fonts ;
+that is,
+they are simulated by the macros.
+This means that although it is legal to set a font register
+to zero or eight,
+it is not legal to use the escape character form,
+such as:
+.(b
+\ef8
+.)b
+.pp
+All distances
+are in basic units,
+so it is nearly always necessary
+to use a scaling factor.
+For example,
+the request
+to set the paragraph indent
+to eight one-en spaces is:
+.(b
+\&.nr pi 8n
+.)b
+and not
+.(b
+\&.nr pi 8
+.)b
+which would set the paragraph indent to eight basic units,
+or about 0.02 inch.
+Default parameter values are given in brackets
+in the remainder of this document.
+.pp
+Registers and strings
+of the form
+.b $ \c
+.i x
+may be used in expressions
+but should not be changed.
+Macros of the form
+.b $ \c
+.i x
+perform some function
+(as described)
+and may be redefined
+to change this function.
+This may be a sensitive operation;
+look at the body of the original macro
+before changing it.
+.pp
+All names in \-me
+follow a rigid naming convention.
+The user may define number registers,
+strings,
+and macros,
+provided that s/he
+uses single character upper case names
+or double character names
+consisting of letters and digits,
+with at least one upper case letter.
+In no case should special characters
+be used in user-defined names.
+.pp
+On daisy wheel type printers
+in twelve pitch,
+the
+.b \-rx1
+flag can be stated to make lines default to
+one eighth inch
+(the normal spacing for a newline in twelve-pitch).
+This is normally too small for easy readability,
+so the default is to space one sixth inch.
+.pp
+The
+.b \-rv2
+flag will indicates that this
+.i is
+being output on a C/A/T
+phototypesetter;
+this changes the page offset
+and inserts cut marks.
+.pp
+This documentation was
+.if n \*N'ed
+.if t \*T'ed
+on \*(td
+and applies to version
+\*(MO
+of the \-me macros.
+.sh 1 "Paragraphing"
+.pp
+These macros are used
+to begin paragraphs.
+The standard paragraph macro
+is
+.b .pp ;
+the others are all variants
+to be used for special purposes.
+.pp
+The first call to one of the paragraphing macros
+defined in this section
+or the
+.b .sh
+macro
+(defined in the next session)
+.i initializes
+the macro processor.
+After initialization
+it is not possible to use any of the following requests:
+.b .sc ,
+.b .lo ,
+.b .th ,
+or
+.b .ac .
+Also,
+the effects of changing parameters
+which will have a global effect
+on the format of the page
+(notably page length and header and footer margins)
+are not well defined
+and should be avoided.
+.TL
+.b .lp
+.DE
+Begin left-justified paragraph.
+Centering and underlining
+are turned off if they were on,
+the font is set to
+.NR (pf
+[1]
+the type size
+is set to
+.NR (pp
+[10p],
+and a
+.NR (ps
+space is inserted
+before the paragraph
+[0.35v in \*T, 1v or 0.5v in \*N
+depending on device resolution].
+The indent is reset
+to
+.NR ($i
+[0]
+plus
+.NR (po
+[0]
+unless the paragraph
+is inside a display.
+(see
+.b .ba ).
+At least
+the first two lines
+of the paragraph
+are kept together
+on a page.
+.TL
+.b .pp
+.DE
+Like
+.b .lp ,
+except that it puts
+.NR (pi
+[5n]
+units of indent.
+This is the standard paragraph macro.
+.TL
+.b .ip
+.i T
+.i I
+.DE
+Indented paragraph
+with hanging tag.
+The body of the following paragraph
+is indented
+.i I
+spaces
+(or
+.NR (ii
+[5n]
+spaces
+if
+.i I
+is not specified)
+more than a non-indented paragraph
+(such as with
+.b .pp )
+is.
+The title
+.i T
+is exdented (opposite of indented).
+The result is a paragraph
+with an even left edge
+and
+.i T
+printed in the margin.
+Any spaces in
+.i T
+must be unpaddable.
+If
+.i T
+will not fit in the space provided,
+.b .ip
+will start a new line.
+.TL
+.b .np
+.DE
+A variant of .ip which numbers paragraphs.
+Numbering is reset
+after a
+.b .lp ,
+.b .pp ,
+or
+.b .sh .
+The current paragraph number
+is in
+.NR ($p .
+.TL
+.b .bu
+.DE
+Like
+.b .np
+except that paragraphs are marked with bullets (\(bu).
+Leading space is eliminated to create compact lists.
+.sh 1 "Section Headings"
+.pp
+Numbered sections
+are similar to paragraphs
+except that a
+section number
+is automatically
+generated for each one.
+The section numbers are of the form
+.b 1.2.3 .
+The
+.i depth
+of the section
+is the count of numbers
+(separated by decimal points)
+in the section number.
+.pp
+Unnumbered section headings are similar,
+except that no number is attached
+to the heading.
+.TL
+.b .sh
+.i +N
+.i T
+.i "a b c d e f"
+.DE
+Begin numbered section
+of depth
+.i N .
+If
+.i N
+is missing
+the current depth
+(maintained in
+the number register
+.NR ($0 )
+is used.
+The values of
+the individual parts of the section number
+are maintained in
+.NR ($1
+through
+.NR ($6 .
+There is a
+.NR (ss
+[1v]
+space before the section.
+.i T
+is printed
+as a section title
+in font
+.NR (sf
+[8]
+and size
+.NR (sp
+[10p].
+The
+.q name
+of the section may be accessed via
+.ST ($n .
+If
+.NR (si
+is non-zero,
+the base indent
+is set to
+.NR (si
+times the section depth,
+and the section title
+is exdented.
+(See
+.b .ba .)
+Also,
+an additional indent of
+.NR (so
+[0]
+is added to the section title
+(but not to the body of the section).
+The font is then set
+to the paragraph font,
+so that more information may occur
+on the line
+with the section number
+and title.
+.b .sh
+insures that there is enough room
+to print the section head
+plus the beginning of a paragraph
+(about 3 lines total).
+If
+.i a
+through
+.i f
+are specified,
+the section number is set to that number
+rather than incremented automatically.
+If any of
+.i a
+through
+.i f
+are a hyphen
+that number is not reset.
+If
+.i T
+is a single underscore
+(\c
+.q _ )
+then the section depth and numbering is reset,
+but the base indent is not reset
+and nothing is printed out.
+This is useful to automatically
+coordinate section numbers with
+chapter numbers.
+.TL
+.b .sx
+.i +N
+.DE
+Go to section depth
+.i N
+[\c
+.b \-1 ],
+but do not print the number
+and title,
+and do not increment the section number
+at level
+.i N .
+This has the effect
+of starting a new paragraph
+at level
+.i N .
+.TL
+.b .uh
+.i T
+.DE
+Unnumbered section heading.
+The title
+.i T
+is printed
+with the same rules for spacing,
+font, etc.,
+as for
+.b .sh .
+.TL
+.b .$p
+.i T
+.i B
+.i N
+.DE
+Print section heading.
+May be redefined
+to get fancier headings.
+.i T
+is the title passed on the
+.b .sh
+or
+.b .uh
+line;
+.i B
+is the section number for this section,
+and
+.i N
+is the depth of this section.
+These parameters are not always present;
+in particular,
+.b .sh
+passes all three,
+.b .uh
+passes only the first,
+and
+.b .sx
+passes three,
+but the first two
+are null strings.
+Care should be taken if this macro
+is redefined;
+it is quite complex and subtle.
+.TL
+.b .$0
+.i T
+.i B
+.i N
+.DE
+This macro is called automatically
+after every call to
+.b .$p .
+It is normally undefined,
+but may be used
+to automatically put
+every section title
+into the table of contents
+or for some similar function.
+.i T
+is the section title
+for the section title which was just printed,
+.i B
+is the section number,
+and
+.i N
+is the section depth.
+.TL
+.b .$1
+\-
+.b .$6
+.DE
+Traps called just before printing that depth section.
+May be defined to
+(for example)
+give variable spacing
+before sections.
+These macros are called from
+.b .$p ,
+so if you redefine that macro
+you may lose this feature.
+.sh 1 "Headers and Footers"
+.ds TP \fI\(aal\|\(aam\^\(aar\^\(aa\fP
+.pp
+Headers and footers
+are put at the top and bottom
+of every page
+automatically.
+They are set in font
+.NR (tf
+[3]
+and size
+.NR (tp
+[10p].
+Each of the definitions
+apply as of the
+.i next
+page.
+Three-part titles
+must be quoted
+if there are two blanks adjacent
+anywhere in the title
+or more than eight blanks total.
+.pp
+The spacing
+of headers and footers
+are controlled by three number registers.
+.NR (hm
+[4v]
+is the distance from the top of the page
+to the top of the header,
+.NR (fm
+[3v]
+is the distance from the bottom of the page
+to the bottom of the footer,
+.NR (tm
+[7v]
+is the distance from the top of the page
+to the top of the text,
+and
+.NR (bm
+[6v]
+is the distance from the bottom of the page
+to the bottom of the text
+(nominal).
+The macros
+.b .m1 ,
+.b .m2 ,
+.b .m3 ,
+and
+.b .m4
+are also supplied for compatibility
+with
+\s-1ROFF\s0 documents.
+.TL
+.b .he
+\*(TP
+.DE
+Define three-part header,
+to be printed on the top
+of every page.
+.TL
+.b .fo
+\*(TP
+.DE
+Define footer,
+to be printed at the bottom
+of every page.
+.TL
+.b .eh
+\*(TP
+.DE
+Define header,
+to be printed at the top of every
+even-numbered page.
+.TL
+.b .oh
+\*(TP
+.DE
+Define header,
+to be printed at the top of every
+odd-numbered page.
+.TL
+.b .ef
+\*(TP
+.DE
+Define footer,
+to be printed at the bottom
+of every even-numbered page.
+.TL
+.b .of
+\*(TP
+.DE
+Define footer,
+to be printed at the bottom
+of every odd-numbered page.
+.TL
+.b .hx
+.DE
+Suppress headers and footers
+on the next page.
+.TL
+.b .m1
+.i +N
+.DE
+Set the space between the top of the page
+and the header
+[4v].
+.TL
+.b .m2
+.i +N
+.DE
+Set the space between the header
+and the first line of text
+[2v].
+.TL
+.b .m3
+.i +N
+.DE
+Set the space
+between the bottom of the text
+and the footer
+[2v].
+.TL
+.b .m4
+.i +N
+.DE
+Set the space
+between the footer
+and the bottom of the page
+[4v].
+.TL
+.b .ep
+.DE
+End this page,
+but do not begin the next page.
+Useful for forcing out footnotes,
+but other than
+that hardly every used.
+Must be followed by a
+.b .bp
+or the end of input.
+.TL
+.b .$h
+.DE
+Called at every page
+to print the header.
+May be redefined
+to provide fancy
+(e.g.,
+multi-line)
+headers,
+but doing so
+loses the function of the
+.b .he ,
+.b .fo ,
+.b .eh ,
+.b .oh ,
+.b .ef ,
+and
+.b .of
+requests,
+as well as the chapter-style title feature
+of
+.b .+c .
+.TL
+.b .$f
+.DE
+Print footer;
+same comments apply
+as in
+.b .$h .
+.TL
+.b .$H
+.DE
+A normally undefined macro
+which is called
+at the top of each page
+(after putting out
+the header,
+initial saved floating keeps,
+etc.);
+in other words,
+this macro is called immediately before
+printing text
+on a page.
+It can be used for column headings
+and the like.
+.sh 1 "Displays"
+.pp
+All displays except centered blocks
+and block quotes
+are preceded and followed
+by an extra
+.NR (bs
+[same as
+.NR (ps ]
+space.
+Quote spacing is stored in a separate register;
+centered blocks have no default initial or trailing space.
+The vertical spacing of all displays except quotes
+and centered blocks
+is stored in register
+.NR ($R
+instead of
+.NR ($r .
+.TL
+.b .(l
+.i m
+.i f
+.DE
+Begin list.
+Lists are single spaced,
+unfilled text.
+If
+.i f
+is
+.b F ,
+the list will be filled.
+If
+.i m
+[\c
+.b I ]
+is
+.b I
+the list is indented by
+.NR (bi
+[4m];
+if
+.b M
+the list is indented to the left margin;
+if
+.b L
+the list is left justified with respect to the text
+(different from
+.b M
+only if the base indent
+(stored in
+.NR ($i
+and set with
+.b .ba )
+is not zero);
+and if
+.b C
+the list is centered on a line-by-line basis.
+The list is set in font
+.NR (df
+[0].
+Must be matched by a
+.b .)l .
+This macro is almost like
+.b .(b
+except that no attempt is made
+to keep the display on one page.
+.TL
+.b .)l
+.DE
+End list.
+.TL
+.b .(q
+.DE
+Begin major quote.
+These are single spaced,
+filled,
+moved in from the text
+on both sides
+by
+.NR (qi
+[4n],
+preceded and followed
+by
+.NR (qs
+[same as
+.NR (bs ]
+space,
+and are set in point size
+.NR (qp
+[one point smaller than surrounding text].
+.TL
+.b .)q
+.DE
+End major quote.
+.TL
+.b .(b
+.i m
+.i f
+.DE
+Begin block.
+Blocks are a form of
+.i keep ,
+where the text of a keep
+is kept together on one page
+if possible
+(keeps are useful
+for tables and figures
+which should not be broken
+over a page).
+If the block will not fit
+on the current page
+a new page is begun,
+.i unless
+that would leave more than
+.NR (bt
+[0]
+white space
+at the bottom of the text.
+If
+.NR (bt
+is zero, the threshold feature
+is turned off.
+Blocks are not filled
+unless
+.i f
+is
+.b F ,
+when they are filled.
+The block will be left-justified
+if
+.i m
+is
+.b L ,
+indented by
+.NR (bi
+[4m]
+if
+.i m
+is
+.b I
+or absent,
+centered
+(line-for-line)
+if
+.i m
+is
+.b C ,
+and left justified to the margin
+(not to the base indent)
+if
+.i m
+is
+.b M .
+The block is set in font
+.NR (df
+[0].
+.TL
+.b .)b
+.DE
+End block.
+.TL
+.b .(z
+.i m
+.i f
+.DE
+Begin floating keep.
+Like
+.b .(b
+except that the keep is
+.i floated
+to the bottom of the page
+or the top of the next page.
+Therefore,
+its position relative to the text changes.
+The floating keep is preceded and followed
+by
+.NR (zs
+[1v]
+space.
+Also,
+it defaults to mode
+.b M .
+.TL
+.b .)z
+.DE
+End floating keep.
+.TL
+.b .(c
+.DE
+Begin centered block.
+The next keep
+is centered as a block,
+rather than on a line-by-line basis
+as with
+.b ".(b C" .
+This call may be nested
+inside keeps.
+.TL
+.b .)c
+.DE
+End centered block.
+.sh 1 Annotations
+.TL
+.b .(d
+.DE
+Begin delayed text.
+Everything in the next keep
+is saved for output
+later with
+.b .pd ,
+in a manner
+similar to footnotes.
+.TL
+.b .)d
+.i n
+.DE
+End delayed text.
+The delayed text number register
+.NR ($d
+and the associated string
+.ST #
+are incremented if
+.ST #
+has been referenced.
+.TL
+.b .pd
+.DE
+Print delayed text.
+Everything diverted via
+.b .(d
+is printed and truncated.
+This might be used
+at the end of each chapter.
+.TL
+.b .(f
+.DE
+Begin footnote.
+The text of the footnote
+is floated to the bottom
+of the page
+and set in font
+.NR (ff
+[1]
+and size
+.NR (fp
+[8p].
+Each entry
+is preceded by
+.NR (fs
+[0.2v]
+space,
+is indented
+.NR (fi
+[3n]
+on the first line,
+and is indented
+.NR (fu
+[0]
+from the right margin.
+Footnotes line up underneath
+two column output.
+If the text of the footnote
+will not all fit on one page
+it will be carried over
+to the next page.
+.TL
+.b .)f
+.i n
+.DE
+End footnote.
+The number register
+.NR ($f
+and the associated string
+.ST *
+are incremented
+if they have been referenced.
+.TL
+.b .$s
+.DE
+The macro to output the footnote separator.
+This macro may be redefined
+to give other size lines or other types
+of separators.
+Currently
+it draws a 1.5i line.
+.TL
+.b .(x
+.i x
+.DE
+Begin index entry.
+Index entries are saved in the index
+.i x
+[\c
+.b x ]
+until called up with
+.b .xp.
+Each entry is preceded
+by a
+.NR (xs
+[0.2v]
+space.
+Each entry is
+.q undented
+by
+.NR (xu
+[0.5i];
+this register tells how far the page number
+extends into the right margin.
+.TL
+.b .)x
+.i P
+.i A
+.DE
+End index entry.
+The index entry
+is finished with a row of dots
+with
+.i A
+[null]
+right justified on the last line
+(such as for an author's name),
+followed by P
+[\c
+.NR % ].
+If
+.i A
+is specified,
+.i P
+must be specified;
+.NR %
+can be used to print the current page number.
+If
+.i P
+is an underscore,
+no page number
+and no row of dots
+are printed.
+.TL
+.b .xp
+.i x
+.DE
+Print index
+.i x
+[\c
+.b x ].
+The index is formatted in the font, size, and so forth
+in effect at the time it is printed,
+rather than at the time it is collected.
+.sh 1 "Columned Output"
+.TL
+.b .2c
+.i +S
+.i N
+.DE
+Enter two-column mode.
+The column separation is set to
+.i +S
+[4n, 0.5i in ACM mode]
+(saved in
+.NR ($s ).
+The column width,
+calculated to fill the single column line length
+with both columns,
+is stored in
+.NR ($l .
+The current column
+is in
+.NR ($c .
+You can test register
+.NR ($m
+[1]
+to see if you are in single column
+or double column mode.
+Actually,
+the request enters
+.i N
+[2]
+column output.
+.TL
+.b .1c
+.DE
+Revert to single-column mode.
+.TL
+.b .bc
+.DE
+Begin column.
+This is like
+.b .bp
+except that it begins a new column
+on a new page
+only if necessary,
+rather than forcing a whole new page
+if there is another column left
+on the current page.
+.sh 1 "Fonts and Sizes"
+.TL
+.b .sz
+.i +P
+.DE
+The pointsize is set to
+.i P
+[10p],
+and the line spacing is set proportionally.
+The ratio of line spacing to pointsize
+is stored in
+.NR ($r .
+The ratio used internally
+by displays and annotations
+is stored in
+.NR ($R
+(although this is not used by
+.b .sz ).
+This size is
+.i not
+sticky beyond many macros:
+in particular,
+.NR (pp
+(paragraph pointsize)
+modifies the pointsize every time a new paragraph is begun
+using the
+.b \&.pp ,
+.b \&.lp ,
+.b \&.ip ,
+.b \&.np ,
+or
+.b \&.bu
+macros.
+Also,
+.NR (fp
+(footnote pointsize),
+.NR (qp
+(quote pointsize),
+.NR (sp
+(section header pointsize),
+and
+.NR (tp
+(title pointsize)
+may modify the pointsize.
+.TL
+.b .r
+.i W
+.i X
+.DE
+Set
+.i W
+in roman font,
+appending
+.i X
+in the previous font.
+To append different font requests,
+use
+.i X
+=
+.b \ec .
+If no parameters,
+change to roman font.
+.TL
+.b .i
+.i W
+.i X
+.DE
+Set
+.i W
+in italics,
+appending
+.i X
+in the previous font.
+If no parameters,
+change to italic font.
+Underlines in \*N.
+.TL
+.b .b
+.i W
+.i X
+.DE
+Set
+.i W
+in bold font
+and append
+.i X
+in the previous font.
+If no parameters,
+switch to bold font.
+In \*N,
+underlines.
+.TL
+.b .rb
+.i W
+.i X
+.DE
+Set
+.i W
+in bold font
+and append
+.i X
+in the previous font.
+If no parameters,
+switch to bold font.
+.b .rb
+differs from
+.b .b
+in that
+.b .rb
+does not underline in \*N.
+.TL
+.b .u
+.i W
+.i X
+.DE
+Underline
+.i W
+and append
+.i X .
+This is a true underlining,
+as opposed to the
+.b .ul
+request,
+which changes to
+.q "underline font"
+(usually italics in \*T).
+It won't work right
+if
+.i W
+is spread or broken (including hyphenated).
+In other words,
+it is safe in nofill mode only.
+.TL
+.b .q
+.i W
+.i X
+.DE
+Quote
+.i W
+and append
+.i X .
+In \*N
+this just surrounds
+.i W
+with double quote marks
+(`\|\c
+.b """" \|'),
+but in \*T
+uses directed quotes.
+.TL
+.b .bi
+.i W
+.i X
+.DE
+Set
+.i W
+in bold italics
+and append
+.i X .
+Actually,
+sets
+.i W
+in italic
+and overstrikes once.
+Underlines in \*N.
+It won't work right
+if
+.i W
+is spread or broken (including hyphenated).
+In other words,
+it is safe in nofill mode only.
+.TL
+.b .bx
+.i W
+.i X
+.DE
+Sets
+.i W
+in a box,
+with
+.i X
+appended.
+Underlines in \*N.
+It won't work right
+if
+.i W
+is spread or broken (including hyphenated).
+In other words,
+it is safe in nofill mode only.
+.TL
+.b sm
+.i W
+.i X
+.DE
+Sets
+.i W
+in a smaller pointsize,
+with
+.i X
+appended.
+.sh 1 "Roff Support"
+.TL
+.b .ix
+.i +N
+.DE
+Indent,
+no break.
+Equivalent to
+.b \(aain
+.i N .
+.TL
+.b .bl
+.i N
+.DE
+Leave
+.i N
+contiguous white space,
+on the next page if not enough room
+on this page.
+Equivalent to a
+.b .sp
+.i N
+inside a block.
+.TL
+.b .pa
+.i +N
+.DE
+Equivalent to
+.b .bp .
+.TL
+.b .ro
+.DE
+Set page number
+in roman numerals.
+Equivalent to
+.b ".af % i" .
+.TL
+.b .ar
+.DE
+Set page number in Arabic.
+Equivalent to
+.b ".af % 1" .
+.TL
+.b .n1
+.DE
+Number lines in margin from one
+on each page.
+.TL
+.b .n2
+.i N
+.DE
+Number lines from
+.i N ,
+stop if
+.i N
+= 0.
+.TL
+.b .sk
+.DE
+Leave the next output page blank,
+except for headers and footers.
+This is used to leave space
+for a full-page diagram
+which is produced externally
+and pasted in later.
+To get a partial-page paste-in display,
+say
+.b .sv \ \c
+.i N ,
+where
+.i N
+is the amount of space
+to leave;
+this space will be output immediately
+if there is room,
+and will otherwise be output
+at the top of the next page.
+However, be warned:
+if
+.i N
+is greater than the amount of available space
+on an empty page,
+no space will ever be output.
+.sh 1 "Preprocessor Support"
+.TL
+.b .EQ
+.i m
+.i T
+.DE
+Begin equation.
+The equation is centered
+if
+.i m
+is
+.b C
+or omitted,
+indented
+.NR (bi
+[4m]
+if
+.i m
+is
+.b I ,
+and left justified if
+.i m
+is
+.b L .
+.i T
+is a title printed on the right margin
+next to the equation.
+See
+.i "Typesetting Mathematics \- User's Guide"
+by Brian W. Kernighan
+and Lorinda L. Cherry.
+.TL
+.b .EN
+.i c
+.DE
+End equation.
+If
+.i c
+is
+.b C
+the equation must be continued
+by immediately following
+with another
+.b .EQ ,
+the text of which
+can be centered
+along with this one.
+Otherwise,
+the equation is printed,
+always on one page,
+with
+.NR (es
+[0.5v in \*T, 1v in \*N]
+space
+above and below it.
+.TL
+.b .TS
+.i h
+.DE
+Table start.
+Tables are single spaced
+and kept on one page
+if possible.
+If you have a large table
+which will not fit on one page,
+use
+.i h
+=
+.b H
+and follow the header part
+(to be printed on every page of the table)
+with a
+.b .TH .
+See
+.i "Tbl \- A Program to Format Tables"
+by M. E. Lesk.
+.TL
+.b .TH
+.DE
+With
+.b ".TS H" ,
+ends the header portion of the table.
+.TL
+.b .TE
+.DE
+Table end.
+Note that this table
+does not float,
+in fact,
+it is not even guaranteed to stay on one page
+if you use requests such as
+.b .sp
+intermixed with the text
+of the table.
+If you want it to float
+(or if you use requests
+inside the table),
+surround the entire table
+(including the
+.b .TS
+and
+.b .TE
+requests)
+with the requests
+.b .(z
+and
+.b .)z .
+.TL
+.b .PS
+.i h
+.i w
+.DE
+Begin
+.i pic
+picture.
+.i H
+is the height and
+.i w
+is the width,
+both in basic units.
+.i Ditroff
+only.
+.TL
+.b .PE
+.DE
+End picture.
+.TL
+.b .IS
+.DE
+Begin
+.i ideal
+picture.
+.TL
+.b .IE
+.DE
+End
+.i ideal
+picture.
+.TL
+.b .IF
+.DE
+End
+.i ideal
+picture (alternate form).
+.TL
+.b GS
+.DE
+Begin
+.i gremlin
+picture.
+.TL
+.b GE
+.DE
+End
+.i gremlin
+picture.
+.TL
+.b GF
+.DE
+End
+.i gremlin
+picture (alternate form).
+.sh 1 "Miscellaneous"
+.TL
+.b .re
+.DE
+Reset tabs.
+Set to every 0.5i
+in \*T
+and every 0.8i in \*N.
+.TL
+.b .ba
+.i +N
+.DE
+Set the base indent
+to
+.i +N
+[0]
+(saved in
+.NR ($i ).
+All paragraphs,
+sections,
+and displays
+come out indented by this amount.
+Titles and footnotes
+are unaffected.
+The
+.b .sh
+request performs a
+.b .ba
+request
+if
+.NR (si
+[0] is not zero,
+and sets the base indent to
+.NR (si \c
+.b * \c
+.NR ($0 .
+.TL
+.b .xl
+.i +N
+.DE
+Set the line length to
+.i N
+[6.0i].
+This differs
+from
+.b .ll
+because it only affects the current environment.
+.TL
+.b .ll
+.i +N
+.DE
+Set line length in all environments
+to
+.i N
+[6.0i].
+This should not be used
+after output has begun,
+and particularly not in two-column output.
+The current line length is stored in
+.NR ($l .
+.TL
+.b .hl
+.DE
+Draws a horizontal line
+the length of the page.
+This is useful
+inside floating keeps
+to differentiate
+between the text
+and the figure.
+.TL
+.b .lh
+.DE
+Print a letterhead at the current position on the page.
+The format of the letterhead must be defined
+in the file
+.b /usr/lib/me/letterhead.me
+by your local systems staff.
+Some environments may require
+.i ditroff
+for this macro
+to function properly.
+.TL
+.b .lo
+.DE
+This macro loads another set of macros
+(in
+.b /usr/lib/me/local.me )
+which is intended to be a set of locally defined macros.
+These macros
+should all be of the form
+.b .* \c
+.i X ,
+where
+.i X
+is any letter
+(upper or lower case)
+or digit.
+.sh 1 "Standard Papers"
+.TL
+.b .tp
+.DE
+Begin title page.
+Spacing at the top of the page
+can occur,
+and headers and footers are suppressed.
+Also,
+the page number
+is not incremented
+for this page.
+.TL
+.b .th
+.DE
+Set thesis mode.
+This defines the modes acceptable
+for a doctoral dissertation
+at Berkeley.
+It double spaces,
+defines the header
+to be a single page number,
+and changes the margins
+to be 1.5 inch on the left
+and one inch on the top.
+.b .++
+and
+.b .+c
+should be used with it.
+This macro must be stated
+before
+initialization,
+that is,
+before the first call of a paragraphing
+macro
+or
+.b .sh .
+.TL
+.b .++
+.i m
+.i H
+.DE
+This request defines the section of the paper
+which we are entering.
+The section type is defined by
+.i m .
+.b C
+means that we are entering the chapter portion
+of the paper,
+.b A
+means that we are entering the appendix portion
+of the paper,
+.b P
+means that the material following
+should be the preliminary portion
+(abstract, table of contents, etc.)
+portion of the paper,
+.b AB
+means that we are entering the abstract
+(numbered independently from 1
+in Arabic numerals),
+and
+.b B
+means that we are entering the bibliographic
+portion at the end of the paper.
+Also, the variants
+.b RC
+and
+.b RA
+are allowed,
+which specify renumbering of pages
+from one at the beginning of each
+chapter or appendix,
+respectively.
+The
+.i H
+parameter defines the new header.
+If there are any spaces in it,
+the entire header must be quoted.
+If you want the header to have the chapter number
+in it,
+Use the string
+.b "\e\e\e\en(ch" .
+For example, to number appendixes
+.b A.1
+etc.,
+type
+.b ".++ RA \(aa\(aa\(aa\e\e\e\en(ch.%\(aa" .
+Each section
+(chapter, appendix, etc.)
+should be preceded by the
+.b .+c
+request.
+It should be mentioned
+that it is easier when using
+\*T to put the front material
+at the end of the paper,
+so that the table of contents
+can be collected and put out;
+this material can then be physically
+moved to the beginning of the paper.
+.TL
+.b .+c
+.i T
+.DE
+Begin chapter with title
+.i T .
+The chapter number
+is maintained in
+.NR (ch .
+This register is incremented
+every time
+.b .+c
+is called with a parameter.
+The title and chapter number
+are printed by
+.b .$c .
+The header is moved to the footer
+on the first page
+of each chapter.
+If
+.i T
+is omitted,
+.b .$c
+is not called;
+this is useful for doing your own
+.q "title page"
+at the beginning of papers
+without a title page proper.
+.b .$c
+calls
+.b .$C
+as a hook so that chapter titles can be inserted
+into a table of contents automatically.
+The footnote numbering is reset to one.
+.TL
+.b .$c
+.i T
+.DE
+Print chapter number
+(from
+.NR (ch )
+and
+.i T .
+This macro can be redefined to your liking.
+It is defined by default
+to be acceptable
+for a PhD thesis
+at Berkeley.
+This macro calls
+.b $C ,
+which can be defined to make index entries,
+or whatever.
+.TL
+.b .$C
+.i K
+.i N
+.i T
+.DE
+This macro is called by
+.b .$c .
+It is normally undefined,
+but can be used to automatically insert
+index entries,
+or whatever.
+.i K
+is a keyword,
+either
+.q Chapter
+or
+.q Appendix
+(depending on the
+.b .++
+mode);
+.i N
+is the chapter or appendix number,
+and
+.i T
+is the chapter or appendix title.
+.TL
+.b .ac
+.i A
+.i N
+.DE
+This macro
+(short for
+.b .acm )
+sets up the \*N environment
+for camera-ready papers
+as used by the ACM.
+This format is 25% larger,
+and has no headers or footers.
+The author's name
+.i A
+is printed at the bottom of the page
+(but off the part which will be printed
+in the conference proceedings),
+together with the current page number
+and the total number of pages
+.i N .
+Additionally,
+this macro loads the file
+.b /usr/lib/me/acm.me ,
+which may later be augmented with other macros
+useful for printing papers
+for ACM conferences.
+It should be noted
+that this macro will not work correctly in version 7 \*T,
+since it sets the page length
+wider than the physical width
+of the C/A/T phototypesetter roll.
+.sh 1 "Predefined Strings"
+.TL
+.ST *
+.DE
+Footnote number, actually
+.ST [ \c
+.NR ($f \c
+.ST ] .
+This macro is incremented
+after each call to
+.b .)f .
+.TL
+.ST #
+.DE
+Delayed text number.
+Actually
+[\c
+.NR ($d ].
+.TL
+.ST [
+.DE
+Superscript.
+This string gives upward movement
+and a change to a smaller point size
+if possible,
+otherwise it gives the left bracket character
+(`\^\c
+.b [ \^').
+Extra space is left above the line
+to allow room for the superscript.
+.TL
+.ST ]
+.DE
+Unsuperscript.
+Inverse to
+.ST [ .
+For example,
+to produce a superscript
+you might type
+.b x \c
+.ST [ \c
+.b 2 \c
+.ST ] ,
+which will produce
+.b x\*[2\*] .
+.TL
+.ST <
+.DE
+Subscript.
+Defaults to
+`\^<\^'
+if half-carriage motion not possible.
+Extra space is left below the line
+to allow for the subscript.
+.TL
+.ST >
+.DE
+Inverse to
+.ST < .
+.TL
+.ST (dw
+.DE
+The day of the week,
+as a word.
+.TL
+.ST (mo
+.DE
+The month,
+as a word.
+.TL
+.ST (td
+.DE
+Today's date,
+directly printable.
+The date is of the form \*(td.
+Other forms of the date can be used
+by using
+.NR (dy
+(the day of the month;
+for example, \n(dy),
+.ST (mo
+(as noted above)
+or
+.NR (mo
+(the same,
+but as an ordinal number;
+for example, \*(mo is \n(mo),
+and
+.NR (yr
+(the last two digits of the current year).
+.TL
+.ST (lq
+.DE
+Left quote marks.
+Double quote in \*N.
+.TL
+.ST (rq
+.DE
+Right quote.
+.TL
+.ST \-
+.DE
+.ie \w'\(34'>0 \(34
+.el 3/4
+em dash in \*T;
+two hyphens in \*N.
+.sh 1 "Special Characters and Marks"
+.pp
+There are a number of special characters
+and diacritical marks
+(such as accents)
+available through \-me.
+To reference these characters,
+you must call the macro
+.b .sc
+to define the characters before using them.
+.TL
+.b .sc
+.DE
+Define special characters and diacritical marks, as described
+in the remainder of this section.
+This macro must be stated
+before initialization.
+The special characters available
+are listed below.
+.in +4n
+.ta 15 +5 +6
+.nf
+Name Usage Example
+Acute accent \e*\(aa a\e*\(aa a\*'
+Grave accent \e*\(ga e\e*\(ga e\*`
+Umlat \e*: u\e*: u\*:
+Tilde \e*~ n\e*~ n\*~
+Caret \e*^ e\e*^ e\*^
+Cedilla \e*, c\e*, c\*,
+Czech \e*v e\e*v e\*v
+Circle \e*o A\e*o A\*o
+There exists \e*(qe \*(qe
+For all \e*(qa \*(qa
+.fi
+.sp 1i
+.in 0
+.b Acknowledgments
+.pp
+I would like to thank
+Bob Epstein,
+Bill Joy,
+and Larry Rowe
+for having the courage
+to use the \-me macros
+to produce non-trivial papers
+during the development stages;
+Ricki Blau,
+Pamela Humphrey,
+and Jim Joyce
+for their help with the documentation phase;
+peter kessler
+for numerous complaints,
+most accompanied by fixes;
+and the plethora of people who have contributed ideas
+and have given support for the project.
+.bp
+.b Summary
+.pp
+This alphabetical list summarizes all macros, strings, and number registers
+available in the \-me macros.
+Selected
+.i troff
+commands, registers, and functions are included as well;
+those listed can generally be used with impunity.
+.pp
+The columns are the name of the
+command, macro, register, or string;
+the type of the object,
+and the description.
+Types are
+.b M
+for macro or builtin command
+(invoked with
+.b \&.
+or
+.b \&\'
+in the first input column),
+.b S
+for a string
+(invoked with
+.b \e*
+or
+.b \e*( ),
+.b R
+for a number register
+(invoked with
+.b \en
+or
+.b \en( ),
+and
+.b F
+for a
+.i troff
+builtin function
+(invoked by preceding it with a single backslash).
+.pp
+Lines marked with \(sc are
+.i troff
+internal codes.
+Lines marked with \(dg or \(dd
+may be defined by the user to get special functions;
+\(dd indicates that these are defined by default
+and changing them may have unexpected side effects.
+Lines marked with \(de
+are specific to
+.i ditroff
+(device-independent
+.i troff ).
+.de $H
+.ev 1
+.ta \w'\e(space)\(sc\ 'u +\w'TYPE 'u
+NAME TYPE DESCRIPTION
+.ev
+..
+.(l
+.$H
+\e(space) F\(sc unpaddable space
+\e" F\(sc comment (to end of line)
+\e*# S optional delayed text tag string
+\e$\fI\&N\fP F\(sc interpolate argument \fI\&N\fP
+\en($0 R section depth
+\&.$0 M\(dg invoked after section title printed
+\en($1 R first section number
+\&.$1 M\(dg invoked before printing depth 1 section
+\en($2 R second section number
+\&.$2 M\(dg invoked before printing depth 2 section
+\en($3 R third section number
+\&.$3 M\(dg invoked before printing depth 3 section
+\en($4 R fourth section number
+\&.$4 M\(dg invoked before printing depth 4 section
+\en($5 R fifth section number
+\&.$5 M\(dg invoked before printing depth 5 section
+\en($6 R sixth section number
+\&.$6 M\(dg invoked before printing depth 6 section
+\&.$C M\(dg called at beginning of chapter
+\&.$H M\(dg text header
+\en($R R\(dd relative vertical spacing in displays
+\en($c R current column number
+\&.$c M\(dd print chapter title
+\en($d R delayed text number
+\en($f R footnote number
+\&.$f M\(dd print footer
+\&.$h M\(dd print header
+\en($i R paragraph base indent
+\en($l R column width
+\en($m R number of columns in effect
+\e*($n S section name
+\en($p R numbered paragraph number
+\&.$p M\(dd print section heading (internal macro)
+\en($r R\(dd relative vertical spacing in text
+\en($s R column indent
+\&.$s M\(dd footnote separator (from text)
+\en% R\(sc current page number
+\e& F\(sc zero width character, useful for hiding controls
+\e(\fI\&xx\fP F\(sc interpolate special character \fI\&xx\fP
+\&.(b M begin block
+\&.(c M begin centered block
+\&.(d M begin delayed text
+\&.(f M begin footnote
+\&.(l M begin list
+\&.(q M begin quote
+\&.(x M begin index entry
+\&.(z M begin floating keep
+\&.)b M end block
+\&.)c M end centered block
+\&.)d M end delayed text
+\&.)f M end footnote
+\&.)l M end list
+\&.)q M end quote
+\&.)x M end index entry
+\&.)z M end floating keep
+\e*\fI\&x\fP F\(sc interpolate string \fI\&x\fP
+\e*(\fI\&xx\fP F\(sc interpolate string \fI\&xx\fP
+\e** S optional footnote tag string
+\&.++ M set paper section type
+\&.+c M begin chapter
+\e*, S cedilla
+\e\- F\(sc minus sign
+\e*\- S 3/4 em dash
+\e0 F\(sc unpaddable digit-width space
+\&.1c M revert to single column output
+\&.2c M begin two column output
+\e*: S umlat
+\e*< S begin subscript
+\e*> S end subscript
+\&.EN M end equation
+\&.EQ M begin equation
+\eL\'\fI\&d\fP\' F\(sc vertical line drawing function for distance \fI\&d\fP
+\&.GE M\(de end \fIgremlin\fP picture
+\&.GF M\(de end \fIgremlin\fP picture (with flyback)
+\&.GS M\(de start \fIgremlin\fP picture
+\&.IE M\(de end \fIideal\fP picture
+\&.IF M\(de end \fIideal\fP picture (with flyback)
+\&.IS M\(de start \fIideal\fP picture
+\&.PE M\(de end \fIpic\fP picture
+\&.PF M\(de end \fIpic\fP picture (with flyback)
+\&.PS M\(de start \fIpic\fP picture
+\&.TE M end table
+\&.TH M end header of table
+\&.TS M begin table
+\e*[ S begin superscript
+\en(\&.$ R\(sc number of arguments to macro
+\en(\&.i R\(sc current indent
+\en(\&.l R\(sc current line length
+\en(\&.s R\(sc current point size
+\e*(\&\' S acute accent
+\e*(\&\` S grave accent
+\e(\' F\(sc acute accent
+\e(\` F\(sc grave accent
+\e*] S end superscript
+\e^ F\(sc 1/12 em narrow space
+\e*^ S caret
+\&.ac M ACM mode
+\&.ad M\(sc set text adjustment
+\&.af M\(sc assign format to register
+\&.am M\(sc append to macro
+\&.ar M set page numbers in Arabic
+\&.as M\(sc append to string
+\&.b M bold font
+\&.ba M set base indent
+\&.bc M begin new column
+\&.bi M bold italic
+\en(bi R display (block) indent
+\&.bl M blank lines (even at top of page)
+\en(bm R bottom title margin
+\&.bp M\(sc begin page
+\&.br M\(sc break (start new line)
+\en(bs R display (block) pre/post spacing
+\en(bt R block keep threshold
+\&.bx M boxed
+\ec F\(sc continue input
+\&.ce M\(sc center lines
+\en(ch R current chapter number
+\&.de M\(sc define macro
+\en(df R display font
+\&.ds M\(sc define string
+\en(dw R\(sc current day of week
+\e*(dw S current day of week
+\en(dy R\(sc day of month
+\ee F\(sc printable version of \e
+\&.ef M set footer (even numbered pages only)
+\&.eh M set header (even numbered pages only)
+\&.el M\(sc else part of conditional
+\&.ep M end page
+\en(es R equation pre/post space
+\ef\fI\&f\fP F\(sc inline font change to font \fI\&f\fP
+\ef(\fI\&ff\fP F\(sc inline font change to font \fI\&ff\fP
+\&.fc M\(sc set field characters
+\en(ff R footnote font
+\&.fi M\(sc fill output lines
+\en(fi R footnote indent (first line only)
+\en(fm R footer margin
+\&.fo M set footer
+\en(fp R footnote pointsize
+\en(fs R footnote prespace
+\en(fu R footnote undent (from right margin)
+\eh\'\fI\&d\fP\' F\(sc local horizontal motion for distance \fI\&d\fP
+\&.hc M\(sc set hyphenation character
+\&.he M set header
+\&.hl M draw horizontal line
+\en(hm R header margin
+\&.hx M suppress headers and footers on next page
+\&.hy M\(sc set hyphenation mode
+\&.i M italic font
+\&.ie M\(sc conditional with else
+\&.if M\(sc conditional
+\en(ii R indented paragraph indent
+\&.in M\(sc indent (transient, use .ba for pervasive)
+\&.ip M begin indented paragraph
+\&.ix M indent, no break
+\el\'\fI\&d\fP\' F\(sc horizontal line drawing function for distance \fI\&d\fP
+\&.lc M\(sc set leader repetition character
+\&.lh M\(de interpolate local letterhead
+\&.ll M set line length
+\&.lo M load local macros
+\&.lp M begin left justified paragraph
+\e*(lq S left quote marks
+\&.ls M\(sc set multi-line spacing
+\&.m1 M set space from top of page to header
+\&.m2 M set space from header to text
+\&.m3 M set space from text to footer
+\&.m4 M set space from footer to bottom of page
+\&.mc M\(sc insert margin character
+\&.mk M\(sc mark vertical position
+\en(mo R\(sc month of year
+\e*(mo S current month
+\en\fI\&x\fP F\(sc interpolate number register \fI\&x\fP
+\en(\fI\&xx\fP F\(sc interpolate number register \fI\&xx\fP
+\&.n1 M number lines in margin
+\&.n2 M number lines in margin
+\&.na M\(sc turn off text adjustment
+\&.ne M\(sc need vertical space
+\&.nf M\(sc don't fill output lines
+\&.nh M\(sc turn off hyphenation
+\&.np M begin numbered paragraph
+\&.nr M\(sc set number register
+\&.ns M\(sc no space mode
+\e*o S circle (e.g., for Norse A\*o)
+\&.of M set footer (odd numbered pages only)
+\&.oh M set header (odd numbered pages only)
+\&.pa M begin page
+\&.pd M print delayed text
+\en(pf R paragraph font
+\en(pi R paragraph indent
+\&.pl M\(sc set page length
+\&.pn M\(sc set next page number
+\&.po M\(sc page offset
+\en(po R simulated page offset
+\&.pp M begin paragraph
+\en(pp R paragraph pointsize
+\en(ps R paragraph prespace
+\&.q M quoted
+\e*(qa S for all
+\e*(qe S there exists
+\en(qi R quote indent (also shortens line)
+\en(qp R quote pointsize
+\en(qs R quote pre/post space
+\&.r M roman font
+\&.rb M real bold font
+\&.re M reset tabs
+\&.rm M\(sc remove macro or string
+\&.rn M\(sc rename macro or string
+\&.ro M set page numbers in roman
+\e*(rq S right quote marks
+\&.rr M\(sc remove register
+\&.rs M\(sc restore spacing
+\&.rt M\(sc return to vertical position
+\es\fI\&S\fP F\(sc inline size change to size \fI\&S\fP
+\&.sc M load special characters
+\en(sf R section title font
+\&.sh M begin numbered section
+\en(si R relative base indent per section depth
+\&.sk M skip next page
+\&.sm M set argument in a smaller pointsize
+\&.so M\(sc source input file
+\en(so R additional section title offset
+\&.sp M\(sc vertical space
+\en(sp R section title pointsize
+\en(ss R section prespace
+\&.sx M change section depth
+\&.sz M set pointsize and vertical spacing
+\&.ta M\(sc set tab stops
+\&.tc M\(sc set tab repetition character
+\e*(td S today's date
+\en(tf R title font
+\&.th M set thesis mode
+\&.ti M\(sc temporary indent (next line only)
+\&.tl M\(sc three part title
+\en(tm R top title margin
+\&.tp M begin title page
+\en(tp R title pointsize
+\&.tr M\(sc translate
+\&.u M underlined
+\&.uh M unnumbered section
+\&.ul M\(sc underline next line
+\ev\'\fI\&d\fP\' F\(sc local vertical motion for distance \fI\&d\fP
+\e*v S inverted `v' for czeck ``e\*v''
+\ew\'\fI\&S\fP\' F\(sc return width of string \fI\&S\fP
+\&.xl M set line length (local)
+\&.xp M print index
+\en(xs R index entry prespace
+\en(xu R index undent (from right margin)
+\en(yr R\(sc year (last two digits only)
+\en(zs R floating keep pre/post space
+\e{ F\(sc begin conditional group
+\e| F\(sc 1/6 em narrow space
+\e} F\(sc end conditional group
+\e*~ S tilde
+.)l
+.rm $H
diff --git a/share/doc/usd/Makefile b/share/doc/usd/Makefile
new file mode 100644
index 0000000..8d68c92
--- /dev/null
+++ b/share/doc/usd/Makefile
@@ -0,0 +1,24 @@
+# @(#)Makefile 8.2 (Berkeley) 4/20/94
+
+# The following modules do not build/install:
+# 08.mh
+
+BINDIR= /usr/share/doc/usd
+FILES= 00.contents Makefile Title
+SUBDIR= 01.begin 02.learn 03.shell 04.csh 05.dc 06.bc 07.mail 09.edtut \
+ 10.edadv 11.vitut 12.exref 13.viref 14.jove 15.sed 16.awk 17.msmacros \
+ 18.msdiffs 19.memacros 20.meref 21.troff 22.trofftut 23.eqn \
+ 24.eqnguide 25.tbl 26.refer 27.invert 28.bib 29.diction 30.rogue \
+ 31.trek
+
+Title.ps: ${FILES}
+ groff Title > ${.TARGET}
+
+contents.ps: ${FILES}
+ groff -ms 00.contents > ${.TARGET}
+
+beforeinstall:
+ install -c -o ${BINOWN} -g ${BINGRP} -m 444 ${FILES} \
+ ${DESTDIR}${BINDIR}
+
+.include <bsd.subdir.mk>
diff --git a/share/doc/usd/Title b/share/doc/usd/Title
new file mode 100644
index 0000000..d36a55f
--- /dev/null
+++ b/share/doc/usd/Title
@@ -0,0 +1,120 @@
+.\" Copyright (c) 1986, 1993 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)Title 8.2 (Berkeley) 4/19/94
+.\"
+.ps 18
+.vs 22
+.sp 2.75i
+.ft B
+.ce 2
+UNIX User's Supplementary Documents
+(USD)
+.ps 14
+.vs 16
+.sp |4i
+.ce 2
+4.4 Berkeley Software Distribution
+.sp |5.75i
+.ft R
+.pt 12
+.vs 16
+.ce
+June, 1993
+.sp |8.2i
+.ce 5
+Computer Systems Research Group
+Computer Science Division
+Department of Electrical Engineering and Computer Science
+University of California
+Berkeley, California 94720
+.bp
+\&
+.sp |1i
+.hy 0
+.ps 10
+.vs 12p
+Copyright 1979, 1980, 1983, 1986, 1993
+The Regents of the University of California. All rights reserved.
+.sp 2
+Other than the specific documents listed below as copyrighted by AT&T,
+redistribution and use of this manual in source and binary forms,
+with or without modification, are permitted provided that the
+following conditions are met:
+.sp 0.5
+.in +0.2i
+.ta 0.2i
+.ti -0.2i
+1) Redistributions of this manual must retain the copyright
+notices on this page, this list of conditions and the following disclaimer.
+.ti -0.2i
+2) Software or documentation that incorporates part of this manual must
+reproduce the copyright notices on this page, this list of conditions and
+the following disclaimer in the documentation and/or other materials
+provided with the distribution.
+.ti -0.2i
+3) All advertising materials mentioning features or use of this software
+must display the following acknowledgement:
+``This product includes software developed by the University of
+California, Berkeley and its contributors.''
+.ti -0.2i
+4) Neither the name of the University nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+.in -0.2i
+.sp
+\fB\s-1THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.\s+1\fP
+.sp 2
+Documents USD:1, 2, 3, 5, 6, 9, 10, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27,
+and 29 are copyright 1979, AT&T Bell Laboratories, Incorporated.
+Holders of \x'-1p'UNIX\v'-4p'\s-3TM\s0\v'4p'/32V,
+System III, or System V software licenses are
+permitted to copy these documents, or any portion of them,
+as necessary for licensed use of the software,
+provided this copyright notice and statement of permission
+are included.
+.sp 2
+Documents USD:8, 14, and 28 are part of the
+user contributed software.
+.sp 2
+The views and conclusions contained in this manual are those of the
+authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of the University of California.
OpenPOWER on IntegriCloud