diff options
Diffstat (limited to 'contrib/perl5/pod')
75 files changed, 29944 insertions, 11567 deletions
diff --git a/contrib/perl5/pod/Makefile b/contrib/perl5/pod/Makefile index eb3fcfe..bd2e148 100644 --- a/contrib/perl5/pod/Makefile +++ b/contrib/perl5/pod/Makefile @@ -1,4 +1,5 @@ -CONVERTERS = pod2html pod2latex pod2man pod2text checkpods +CONVERTERS = pod2html pod2latex pod2man pod2text checkpods \ + pod2usage podchecker podselect HTMLROOT = / # Change this to fix cross-references in HTML POD2HTML = pod2html \ @@ -17,6 +18,7 @@ POD = \ perl.pod \ perldelta.pod \ perl5004delta.pod \ + perl5005delta.pod \ perldata.pod \ perlsyn.pod \ perlop.pod \ @@ -29,19 +31,25 @@ POD = \ perlmod.pod \ perlmodlib.pod \ perlmodinstall.pod \ + perlfork.pod \ perlform.pod \ perllocale.pod \ perlref.pod \ perlreftut.pod \ perldsc.pod \ perllol.pod \ + perlboot.pod \ perltoot.pod \ + perltootc.pod \ perlobj.pod \ perltie.pod \ perlbot.pod \ perlipc.pod \ perlthrtut.pod \ + perldbmfilter.pod \ + perldebguts.pod \ perldebug.pod \ + perlnumber.pod \ perldiag.pod \ perlsec.pod \ perltrap.pod \ @@ -55,6 +63,11 @@ POD = \ perlxstut.pod \ perlguts.pod \ perlcall.pod \ + perlcompile.pod \ + perltodo.pod \ + perlapi.pod \ + perlintern.pod \ + perlhack.pod \ perlhist.pod \ perlfaq.pod \ perlfaq1.pod \ @@ -72,6 +85,7 @@ MAN = \ perl.man \ perldelta.man \ perl5004delta.man \ + perl5005delta.man \ perldata.man \ perlsyn.man \ perlop.man \ @@ -84,19 +98,25 @@ MAN = \ perlmod.man \ perlmodlib.man \ perlmodinstall.man \ + perlfork.man \ perlform.man \ perllocale.man \ perlref.man \ perlreftut.man \ perldsc.man \ perllol.man \ + perlboot.man \ perltoot.man \ + perltootc.man \ perlobj.man \ perltie.man \ perlbot.man \ perlipc.man \ perlthrtut.man \ + perldbmfilter.man \ + perldebguts.man \ perldebug.man \ + perlnumber.man \ perldiag.man \ perlsec.man \ perltrap.man \ @@ -110,6 +130,11 @@ MAN = \ perlxstut.man \ perlguts.man \ perlcall.man \ + perlcompile.man \ + perltodo.man \ + perlapi.man \ + perlintern.man \ + perlhack.man \ perlhist.man \ perlfaq.man \ perlfaq1.man \ @@ -127,6 +152,7 @@ HTML = \ perl.html \ perldelta.html \ perl5004delta.html \ + perl5005delta.html \ perldata.html \ perlsyn.html \ perlop.html \ @@ -139,19 +165,25 @@ HTML = \ perlmod.html \ perlmodlib.html \ perlmodinstall.html \ + perlfork.html \ perlform.html \ perllocale.html \ perlref.html \ perlreftut.html \ perldsc.html \ perllol.html \ + perlboot.html \ perltoot.html \ + perltootc.html \ perlobj.html \ perltie.html \ perlbot.html \ perlipc.html \ perlthrtut.html \ + perldbmfilter.html \ + perldebguts.html \ perldebug.html \ + perlnumber.html \ perldiag.html \ perlsec.html \ perltrap.html \ @@ -165,6 +197,11 @@ HTML = \ perlxstut.html \ perlguts.html \ perlcall.html \ + perlcompile.html \ + perltodo.html \ + perlapi.html \ + perlintern.html \ + perlhack.html \ perlhist.html \ perlfaq.html \ perlfaq1.html \ @@ -182,6 +219,7 @@ TEX = \ perl.tex \ perldelta.tex \ perl5004delta.tex \ + perl5005delta.tex \ perldata.tex \ perlsyn.tex \ perlop.tex \ @@ -194,20 +232,25 @@ TEX = \ perlmod.tex \ perlmodlib.tex \ perlmodinstall.tex \ + perlfork.tex \ perlform.tex \ perllocale.tex \ perlref.tex \ perlreftut.tex \ - perlopentut.tex \ perldsc.tex \ perllol.tex \ + perlboot.tex \ perltoot.tex \ + perltootc.tex \ perlobj.tex \ perltie.tex \ perlbot.tex \ perlipc.tex \ perlthrtut.tex \ + perldbmfilter.tex \ + perldebguts.tex \ perldebug.tex \ + perlnumber.tex \ perldiag.tex \ perlsec.tex \ perltrap.tex \ @@ -221,6 +264,11 @@ TEX = \ perlxstut.tex \ perlguts.tex \ perlcall.tex \ + perlcompile.tex \ + perltodo.tex \ + perlapi.tex \ + perlintern.tex \ + perlhack.tex \ perlhist.tex \ perlfaq.tex \ perlfaq1.tex \ @@ -301,6 +349,15 @@ pod2text: pod2text.PL ../lib/Config.pm checkpods: checkpods.PL ../lib/Config.pm $(PERL) -I ../lib checkpods.PL +pod2usage: pod2usage.PL ../lib/Config.pm + $(PERL) -I ../lib pod2usage.PL + +podchecker: podchecker.PL ../lib/Config.pm + $(PERL) -I ../lib podchecker.PL + +podselect: podselect.PL ../lib/Config.pm + $(PERL) -I ../lib podselect.PL + compile: all $(REALPERL) -I../lib ../utils/perlcc -regex 's/$$/.exe/' pod2latex pod2man pod2text checkpods -prog -verbose dcf -log ../compilelog; diff --git a/contrib/perl5/pod/Win32.pod b/contrib/perl5/pod/Win32.pod new file mode 100644 index 0000000..44ed3d1 --- /dev/null +++ b/contrib/perl5/pod/Win32.pod @@ -0,0 +1,284 @@ +=head1 NAME + +Win32 - Interfaces to some Win32 API Functions + +=head1 DESCRIPTION + +Perl on Win32 contains several functions to access Win32 APIs. Some +are included in Perl itself (on Win32) and some are only available +after explicitly requesting the Win32 module with: + + use Win32; + +The builtin functions are marked as [CORE] and the other ones +as [EXT] in the following alphabetical listing. The C<Win32> module +is not part of the Perl source distribution; it is distributed in +the libwin32 bundle of Win32::* modules on CPAN. The module is +already preinstalled in binary distributions like ActivePerl. + +=head2 Alphabetical Listing of Win32 Functions + +=over + +=item Win32::AbortSystemShutdown(MACHINE) + +[EXT] Aborts a system shutdown (started by the +InitiateSystemShutdown function) on the specified MACHINE. + +=item Win32::BuildNumber() + +[CORE] Returns the ActivePerl build number. This function is +only available in the ActivePerl binary distribution. + +=item Win32::CopyFile(FROM, TO, OVERWRITE) + +[CORE] The Win32::CopyFile() function copies an existing file to a new +file. All file information like creation time and file attributes will +be copied to the new file. However it will B<not> copy the security +information. If the destination file already exists it will only be +overwritten when the OVERWRITE parameter is true. But even this will +not overwrite a read-only file; you have to unlink() it first +yourself. + +=item Win32::DomainName() + +[CORE] Returns the name of the Microsoft Network domain that the +owner of the current perl process is logged into. + +=item Win32::ExpandEnvironmentStrings(STRING) + +[EXT] Takes STRING and replaces all referenced environment variable +names with their defined values. References to environment variables +take the form C<%VariableName%>. Case is ignored when looking up the +VariableName in the environment. If the variable is not found then the +original C<%VariableName%> text is retained. Has the same effect +as the following: + + $string =~ s/%([^%]*)%/$ENV{$1} || "%$1%"/eg + +=item Win32::FormatMessage(ERRORCODE) + +[CORE] Converts the supplied Win32 error number (e.g. returned by +Win32::GetLastError()) to a descriptive string. Analogous to the +perror() standard-C library function. Note that C<$^E> used +in a string context has much the same effect. + + C:\> perl -e "$^E = 26; print $^E;" + The specified disk or diskette cannot be accessed + +=item Win32::FsType() + +[CORE] Returns the name of the filesystem of the currently active +drive (like 'FAT' or 'NTFS'). In list context it returns three values: +(FSTYPE, FLAGS, MAXCOMPLEN). FSTYPE is the filesystem type as +before. FLAGS is a combination of values of the following table: + + 0x00000001 supports case-sensitive filenames + 0x00000002 preserves the case of filenames + 0x00000004 supports Unicode in filenames + 0x00000008 preserves and enforces ACLs + 0x00000010 supports file-based compression + 0x00000020 supports disk quotas + 0x00000040 supports sparse files + 0x00000080 supports reparse points + 0x00000100 supports remote storage + 0x00008000 is a compressed volume (e.g. DoubleSpace) + 0x00010000 supports object identifiers + 0x00020000 supports the Encrypted File System (EFS) + +MAXCOMPLEN is the maximum length of a filename component (the part +between two backslashes) on this file system. + +=item Win32::FreeLibrary(HANDLE) + +[EXT] Unloads a previously loaded dynamic-link library. The HANDLE is +no longer valid after this call. See L<LoadLibrary|Win32::LoadLibrary(LIBNAME)> +for information on dynamically loading a library. + +=item Win32::GetArchName() + +[EXT] Use of this function is deprecated. It is equivalent with +$ENV{PROCESSOR_ARCHITECTURE}. This might not work on Win9X. + +=item Win32::GetChipName() + +[EXT] Returns the processor type: 386, 486 or 586 for Intel processors, +21064 for the Alpha chip. + +=item Win32::GetCwd() + +[CORE] Returns the current active drive and directory. This function +does not return a UNC path, since the functionality required for such +a feature is not available under Windows 95. + +=item Win32::GetFullPathName(FILENAME) + +[CORE] GetFullPathName combines the FILENAME with the current drive +and directory name and returns a fully qualified (aka, absolute) +path name. In list context it returns two elements: (PATH, FILE) where +PATH is the complete pathname component (including trailing backslash) +and FILE is just the filename part. Note that no attempt is made to +convert 8.3 components in the supplied FILENAME to longnames or +vice-versa. Compare with Win32::GetShortPathName and +Win32::GetLongPathName. + +This function has been added for Perl 5.6. + +=item Win32::GetLastError() + +[CORE] Returns the last error value generated by a call to a Win32 API +function. Note that C<$^E> used in a numeric context amounts to the +same value. + +=item Win32::GetLongPathName(PATHNAME) + +[CORE] Returns a representaion of PATHNAME composed of longname +components (if any). The result may not necessarily be longer +than PATHNAME. No attempt is made to convert PATHNAME to the +absolute path. Compare with Win32::GetShortPathName and +Win32::GetFullPathName. + +This function has been added for Perl 5.6. + +=item Win32::GetNextAvailDrive() + +[CORE] Returns a string in the form of "<d>:" where <d> is the first +available drive letter. + +=item Win32::GetOSVersion() + +[CORE] Returns the array (STRING, MAJOR, MINOR, BUILD, ID), where +the elements are, respectively: An arbitrary descriptive string, the +major version number of the operating system, the minor version +number, the build number, and a digit indicating the actual operating +system. For ID, the values are 0 for Win32s, 1 for Windows 9X and 2 +for Windows NT. In scalar context it returns just the ID. + +=item Win32::GetShortPathName(PATHNAME) + +[CORE] Returns a representation of PATHNAME composed only of +short (8.3) path components. The result may not necessarily be +shorter than PATHNAME. Compare with Win32::GetFullPathName and +Win32::GetLongPathName. + +=item Win32::GetProcAddress(INSTANCE, PROCNAME) + +[EXT] Returns the address of a function inside a loaded library. The +information about what you can do with this address has been lost in +the mist of time. Use the Win32::API module instead of this deprecated +function. + +=item Win32::GetTickCount() + +[CORE] Returns the number of milliseconds elapsed since the last +system boot. Resolution is limited to system timer ticks (about 10ms +on WinNT and 55ms on Win9X). + +=item Win32::InitiateSystemShutdown(MACHINE, MESSAGE, TIMEOUT, FORCECLOSE, REBOOT) + +[EXT] Shutsdown the specified MACHINE, notifying users with the +supplied MESSAGE, within the specified TIMEOUT interval. Forces +closing of all documents without prompting the user if FORCECLOSE is +true, and reboots the machine if REBOOT is true. This function works +only on WinNT. + +=item Win32::IsWinNT() + +[CORE] Returns non zero if the Win32 subsystem is Windows NT. + +=item Win32::IsWin95() + +[CORE] Returns non zero if the Win32 subsystem is Windows 95. + +=item Win32::LoadLibrary(LIBNAME) + +[EXT] Loads a dynamic link library into memory and returns its module +handle. This handle can be used with Win32::GetProcAddress and +Win32::FreeLibrary. This function is deprecated. Use the Win32::API +module instead. + +=item Win32::LoginName() + +[CORE] Returns the username of the owner of the current perl process. + +=item Win32::LookupAccountName(SYSTEM, ACCOUNT, DOMAIN, SID, SIDTYPE) + +[EXT] Looks up ACCOUNT on SYSTEM and returns the domain name the SID and +the SID type. + +=item Win32::LookupAccountSID(SYSTEM, SID, ACCOUNT, DOMAIN, SIDTYPE) + +[EXT] Looks up SID on SYSTEM and returns the account name, domain name, +and the SID type. + +=item Win32::MsgBox(MESSAGE [, FLAGS [, TITLE]]) + +[EXT] Create a dialogbox containing MESSAGE. FLAGS specifies the +required icon and buttons according to the following table: + + 0 = OK + 1 = OK and Cancel + 2 = Abort, Retry, and Ignore + 3 = Yes, No and Cancel + 4 = Yes and No + 5 = Retry and Cancel + + MB_ICONSTOP "X" in a red circle + MB_ICONQUESTION question mark in a bubble + MB_ICONEXCLAMATION exclamation mark in a yellow triangle + MB_ICONINFORMATION "i" in a bubble + +TITLE specifies an optional window title. The default is "Perl". + +The function returns the menu id of the selected push button: + + 0 Error + + 1 OK + 2 Cancel + 3 Abort + 4 Retry + 5 Ignore + 6 Yes + 7 No + +=item Win32::NodeName() + +[CORE] Returns the Microsoft Network node-name of the current machine. + +=item Win32::RegisterServer(LIBRARYNAME) + +[EXT] Loads the DLL LIBRARYNAME and calls the function DllRegisterServer. + +=item Win32::SetCwd(NEWDIRECTORY) + +[CORE] Sets the current active drive and directory. This function does not +work with UNC paths, since the functionality required to required for +such a feature is not available under Windows 95. + +=item Win32::SetLastError(ERROR) + +[CORE] Sets the value of the last error encountered to ERROR. This is +that value that will be returned by the Win32::GetLastError() +function. This functions has been added for Perl 5.6. + +=item Win32::Sleep(TIME) + +[CORE] Pauses for TIME milliseconds. The timeslices are made available +to other processes and threads. + +=item Win32::Spawn(COMMAND, ARGS, PID) + +[CORE] Spawns a new process using the supplied COMMAND, passing in +arguments in the string ARGS. The pid of the new process is stored in +PID. This function is deprecated. Please use the Win32::Process module +instead. + +=item Win32::UnregisterServer(LIBRARYNAME) + +[EXT] Loads the DLL LIBRARYNAME and calls the function +DllUnregisterServer. + +=back + +=cut diff --git a/contrib/perl5/pod/buildtoc b/contrib/perl5/pod/buildtoc index a4b9d5a..21fee31 100644 --- a/contrib/perl5/pod/buildtoc +++ b/contrib/perl5/pod/buildtoc @@ -6,13 +6,15 @@ sub output ($); @pods = qw( perl perlfaq perlfaq1 perlfaq2 perlfaq3 perlfaq4 perlfaq5 - perlfaq6 perlfaq7 perlfaq8 perlfaq9 perldelta perldata perlopentut - perlsyn perlop perlre perlreftut perlrun perlfunc perlvar perlsub - perlmod perlmodlib perlmodinstall perlform perllocale perlref perldsc - perllol perltoot perlobj perltie perlthrtut perlbot perlipc perldebug + perlfaq6 perlfaq7 perlfaq8 perlfaq9 perldelta perldata + perlsyn perlop perlre perlrun perlfunc perlvar perlsub + perlmod perlmodlib perlmodinstall perlfork perlform perllocale + perlref perlreftut perldsc + perllol perlboot perltoot perltootc perlobj perltie perlbot perlipc + perldbmfilter perldebug perlnumber perldebguts perldiag perlsec perltrap perlport perlstyle perlpod perlbook - perlembed perlapio perlxs perlxstut perlguts perlcall - perlhist + perlembed perlapio perlxs perlxstut perlguts perlcall perlcompile + perlapi perlintern perlhist ); for (@pods) { s/$/.pod/ } @@ -112,6 +114,8 @@ podset( @modules[ sort { $modname[$a] cmp $modname[$b] } 0 .. $#modules ] ); Here should be listed all the extra programs' documentation, but they don't all have manual pages yet: + =over + =item a2p =item s2p @@ -130,6 +134,7 @@ podset( @modules[ sort { $modname[$a] cmp $modname[$b] } 0 .. $#modules ] ); =item wrapsuid + =back =head1 AUTHOR @@ -148,8 +153,7 @@ sub podset { while(<>) { if (s/^=head1 (NAME)\s*/=head2 /) { $pod = path2modname($ARGV); - unitem(); - unhead2(); + unhead1(); output "\n \n\n=head2 "; $_ = <>; if ( /^\s*$pod\b/ ) { @@ -162,7 +166,9 @@ sub podset { next; } if (s/^=head1 (.*)/=item $1/) { - unitem(); unhead2(); + unhead2(); + output "=over\n\n" unless $inhead1; + $inhead1 = 1; output $_; nl(); next; } if (s/^=head2 (.*)/=item $1/) { @@ -170,7 +176,6 @@ sub podset { output "=over\n\n" unless $inhead2; $inhead2 = 1; output $_; nl(); next; - } if (s/^=item ([^=].*)\n/$1/) { next if $pod eq 'perldiag'; @@ -186,6 +191,10 @@ sub podset { s/^-X\b/-I<X>/; output $_; next; } + if (s/^=cut\s*\n//) { + unhead1(); + next; + } } } @@ -198,12 +207,20 @@ sub path2modname { return $_; } +sub unhead1 { + unhead2(); + if ($inhead1) { + output "\n\n=back\n\n"; + } + $inhead1 = 0; +} + sub unhead2 { + unitem(); if ($inhead2) { output "\n\n=back\n\n"; } $inhead2 = 0; - $initem = 0; } sub unitem { diff --git a/contrib/perl5/pod/perl.pod b/contrib/perl5/pod/perl.pod index 6e218cd..59ca0e0 100644 --- a/contrib/perl5/pod/perl.pod +++ b/contrib/perl5/pod/perl.pod @@ -4,22 +4,20 @@ perl - Practical Extraction and Report Language =head1 SYNOPSIS -B<perl> S<[ B<-sTuU> ]> - S<[ B<-hv> ] [ B<-V>[:I<configvar>] ]> - S<[ B<-cw> ] [ B<-d>[:I<debugger>] ] [ B<-D>[I<number/list>] ]> - S<[ B<-pna> ] [ B<-F>I<pattern> ] [ B<-l>[I<octal>] ] [ B<-0>[I<octal>] ]> - S<[ B<-I>I<dir> ] [ B<-m>[B<->]I<module> ] [ B<-M>[B<->]I<'module...'> ]> - S<[ B<-P> ]> - S<[ B<-S> ]> - S<[ B<-x>[I<dir>] ]> - S<[ B<-i>[I<extension>] ]> - S<[ B<-e> I<'command'> ] [ B<--> ] [ I<programfile> ] [ I<argument> ]...> - -For ease of access, the Perl manual has been split up into a number -of sections: +B<perl> S<[ B<-sTuU> ]> S<[ B<-hv> ] [ B<-V>[:I<configvar>] ]> + S<[ B<-cw> ] [ B<-d>[:I<debugger>] ] [ B<-D>[I<number/list>] ]> + S<[ B<-pna> ] [ B<-F>I<pattern> ] [ B<-l>[I<octal>] ] [ B<-0>[I<octal>] ]> + S<[ B<-I>I<dir> ] [ B<-m>[B<->]I<module> ] [ B<-M>[B<->]I<'module...'> ]> + S<[ B<-P> ]> S<[ B<-S> ]> S<[ B<-x>[I<dir>] ]> + S<[ B<-i>[I<extension>] ]> S<[ B<-e> I<'command'> ] + [ B<--> ] [ I<programfile> ] [ I<argument> ]...> + +For ease of access, the Perl manual has been split up into several +sections: perl Perl overview (this section) perldelta Perl changes since previous version + perl5005delta Perl changes in version 5.005 perl5004delta Perl changes in version 5.004 perlfaq Perl frequently asked questions perltoc Perl documentation table of contents @@ -37,21 +35,30 @@ of sections: perlmodlib Perl modules: how to write and use perlmodinstall Perl modules: how to install from CPAN perlform Perl formats + perlunicode Perl unicode support perllocale Perl locale support - perlref Perl references perlreftut Perl references short introduction + perlref Perl references, the rest of the story perldsc Perl data structures intro - perllol Perl data structures: lists of lists - perltoot Perl OO tutorial + perllol Perl data structures: arrays of arrays + perlboot Perl OO tutorial for beginners + perltoot Perl OO tutorial, part 1 + perltootc Perl OO tutorial, part 2 perlobj Perl objects perltie Perl objects hidden behind simple variables perlbot Perl OO tricks and examples perlipc Perl interprocess communication + perlfork Perl fork() information perlthrtut Perl threads tutorial + perllexwarn Perl warnings and their control + perlfilter Perl source filters + perldbmfilter Perl DBM filters + perlcompile Perl compiler suite intro perldebug Perl debugging perldiag Perl diagnostic messages + perlnumber Perl number semantics perlsec Perl security perltrap Perl traps for the unwary perlport Perl portability guide @@ -62,18 +69,33 @@ of sections: perlembed Perl ways to embed perl in your C or C++ application perlapio Perl internal IO abstraction interface + perldebguts Perl debugging guts and tips perlxs Perl XS application programming interface perlxstut Perl XS tutorial perlguts Perl internal functions for those doing extensions perlcall Perl calling conventions from C + perlapi Perl API listing (autogenerated) + perlintern Perl internal functions (autogenerated) + perltodo Perl things to do + perlhack Perl hackers guide perlhist Perl history records + perlamiga Perl notes for Amiga + perlcygwin Perl notes for Cygwin + perldos Perl notes for DOS + perlhpux Perl notes for HP-UX + perlmachten Perl notes for Power MachTen + perlos2 Perl notes for OS/2 + perlos390 Perl notes for OS/390 + perlvms Perl notes for VMS + perlwin32 Perl notes for Windows + (If you're intending to read these straight through for the first time, the suggested order will tend to reduce the number of forward references.) -By default, all of the above manpages are installed in the -F</usr/local/man/> directory. +By default, the manpages listed above are installed in the +F</usr/local/man/> directory. Extensive additional documentation for Perl modules is available. The default configuration for perl will place this additional documentation @@ -116,17 +138,17 @@ Perl combines (in the author's opinion, anyway) some of the best features of C, B<sed>, B<awk>, and B<sh>, so people familiar with those languages should have little difficulty with it. (Language historians will also note some vestiges of B<csh>, Pascal, and even -BASIC-PLUS.) Expression syntax corresponds quite closely to C +BASIC-PLUS.) Expression syntax corresponds closely to C expression syntax. Unlike most Unix utilities, Perl does not arbitrarily limit the size of your data--if you've got the memory, Perl can slurp in your whole file as a single string. Recursion is of unlimited depth. And the tables used by hashes (sometimes called "associative arrays") grow as necessary to prevent degraded performance. Perl can use sophisticated pattern matching techniques to -scan large amounts of data very quickly. Although optimized for +scan large amounts of data quickly. Although optimized for scanning text, Perl can also deal with binary data, and can make dbm files look like hashes. Setuid Perl scripts are safer than C programs -through a dataflow tracing mechanism which prevents many stupid +through a dataflow tracing mechanism that prevents many stupid security holes. If you have a problem that would ordinarily use B<sed> or B<awk> or @@ -137,107 +159,63 @@ scripts into Perl scripts. But wait, there's more... -Perl version 5 is nearly a complete rewrite, and provides -the following additional benefits: - -=over 5 +Begun in 1993 (see L<perlhist>), Perl version 5 is nearly a complete +rewrite that provides the following additional benefits: -=item * Many usability enhancements +=over -It is now possible to write much more readable Perl code (even within -regular expressions). Formerly cryptic variable names can be replaced -by mnemonic identifiers. Error messages are more informative, and the -optional warnings will catch many of the mistakes a novice might make. -This cannot be stressed enough. Whenever you get mysterious behavior, -try the B<-w> switch!!! Whenever you don't get mysterious behavior, -try using B<-w> anyway. +=item * modularity and reusability using innumerable modules -=item * Simplified grammar +Described in L<perlmod>, L<perlmodlib>, and L<perlmodinstall>. -The new yacc grammar is one half the size of the old one. Many of the -arbitrary grammar rules have been regularized. The number of reserved -words has been cut by 2/3. Despite this, nearly all old Perl scripts -will continue to work unchanged. +=item * embeddable and extensible -=item * Lexical scoping +Described in L<perlembed>, L<perlxstut>, L<perlxs>, L<perlcall>, +L<perlguts>, and L<xsubpp>. -Perl variables may now be declared within a lexical scope, like "auto" -variables in C. Not only is this more efficient, but it contributes -to better privacy for "programming in the large". Anonymous -subroutines exhibit deep binding of lexical variables (closures). +=item * roll-your-own magic variables (including multiple simultaneous DBM implementations) -=item * Arbitrarily nested data structures +Described in L<perltie> and L<AnyDBM_File>. -Any scalar value, including any array element, may now contain a -reference to any other variable or subroutine. You can easily create -anonymous variables and subroutines. Perl manages your reference -counts for you. +=item * subroutines can now be overridden, autoloaded, and prototyped -=item * Modularity and reusability +Described in L<perlsub>. -The Perl library is now defined in terms of modules which can be easily -shared among various packages. A package may choose to import all or a -portion of a module's published interface. Pragmas (that is, compiler -directives) are defined and used by the same mechanism. +=item * arbitrarily nested data structures and anonymous functions -=item * Object-oriented programming +Described in L<perlreftut>, L<perlref>, L<perldsc>, and L<perllol>. -A package can function as a class. Dynamic multiple inheritance and -virtual methods are supported in a straightforward manner and with very -little new syntax. Filehandles may now be treated as objects. +=item * object-oriented programming -=item * Embeddable and Extensible +Described in L<perlobj>, L<perltoot>, and L<perlbot>. -Perl may now be embedded easily in your C or C++ application, and can -either call or be called by your routines through a documented -interface. The XS preprocessor is provided to make it easy to glue -your C or C++ routines into Perl. Dynamic loading of modules is -supported, and Perl itself can be made into a dynamic library. +=item * compilability into C code or Perl bytecode -=item * POSIX compliant +Described in L<B> and L<B::Bytecode>. -A major new module is the POSIX module, which provides access to all -available POSIX routines and definitions, via object classes where -appropriate. +=item * support for light-weight processes (threads) -=item * Package constructors and destructors +Described in L<perlthrtut> and L<Thread>. -The new BEGIN and END blocks provide means to capture control as -a package is being compiled, and after the program exits. As a -degenerate case they work just like awk's BEGIN and END when you -use the B<-p> or B<-n> switches. +=item * support for internationalization, localization, and Unicode -=item * Multiple simultaneous DBM implementations +Described in L<perllocale> and L<utf8>. -A Perl program may now access DBM, NDBM, SDBM, GDBM, and Berkeley DB -files from the same script simultaneously. In fact, the old dbmopen -interface has been generalized to allow any variable to be tied -to an object class which defines its access methods. +=item * lexical scoping -=item * Subroutine definitions may now be autoloaded +Described in L<perlsub>. -In fact, the AUTOLOAD mechanism also allows you to define any arbitrary -semantics for undefined subroutine calls. It's not for just autoloading. +=item * regular expression enhancements -=item * Regular expression enhancements +Described in L<perlre>, with additional examples in L<perlop>. -You can now specify nongreedy quantifiers. You can now do grouping -without creating a backreference. You can now write regular expressions -with embedded whitespace and comments for readability. A consistent -extensibility mechanism has been added that is upwardly compatible with -all old regular expressions. +=item * enhanced debugger and interactive Perl environment, with integrated editor support -=item * Innumerable Unbundled Modules +Described in L<perldebug>. -The Comprehensive Perl Archive Network described in L<perlmodlib> -contains hundreds of plug-and-play modules full of reusable code. -See F<http://www.perl.com/CPAN> for a site near you. +=item * POSIX 1003.1 compliant library -=item * Compilability - -While not yet in full production mode, a working perl-to-C compiler -does exist. It can generate portable byte code, simple C, or -optimized C code. +Described in L<POSIX>. =back @@ -245,76 +223,9 @@ Okay, that's I<definitely> enough hype. =head1 AVAILABILITY -Perl is available for the vast majority of operating system platforms, -including most Unix-like platforms. The following situation is as of -February 1999 and Perl 5.005_03. - -The following platforms are able to build Perl from the standard -source code distribution available at -F<http://www.perl.com/CPAN/src/index.html> - - AIX Linux SCO ODT/OSR - A/UX MachTen Solaris - BeOS MPE/iX SunOS - BSD/OS NetBSD SVR4 - DG/UX NextSTEP Tru64 UNIX 3) - DomainOS OpenBSD Ultrix - DOS DJGPP 1) OpenSTEP UNICOS - DYNIX/ptx OS/2 VMS - FreeBSD OS390 2) VOS - HP-UX PowerMAX Windows 3.1 1) - Hurd QNX Windows 95 1) 4) - IRIX Windows 98 1) 4) - Windows NT 1) 4) - - 1) in DOS mode either the DOS or OS/2 ports can be used - 2) formerly known as MVS - 3) formerly known as Digital UNIX and before that DEC OSF/1 - 4) compilers: Borland, Cygwin32, Mingw32 EGCS/GCC, VC++ - -The following platforms have been known to build Perl from the source -but for the Perl release 5.005_03 we haven't been able to verify them, -either because the hardware/software platforms are rather rare or -because we don't have an active champion on these platforms, or both. - - 3b1 FPS Plan 9 - AmigaOS GENIX PowerUX - ConvexOS Greenhills RISC/os - CX/UX ISC Stellar - DC/OSx MachTen 68k SVR2 - DDE SMES MiNT TI1500 - DOS EMX MPC TitanOS - Dynix NEWS-OS UNICOS/mk - EP/IX Opus Unisys Dynix - ESIX Unixware - -The following platforms are planned to be supported in the standard -source code distribution of the Perl release 5.006 but are not -supported in the Perl release 5.005_03: - - BS2000 - Netware - Rhapsody - VM/ESA - -The following platforms have their own source code distributions and -binaries available via F<http://www.perl.com/CPAN/ports/index.html>. - - Perl release - - AS/400 5.003 - MacOS 5.004 - Netware 5.003_07 - Tandem Guardian 5.004 - -The following platforms have only binaries available via -F<http://www.perl.com/CPAN/ports/index.html>. - - Perl release - - Acorn RISCOS 5.005_02 - AOS 5.002 - LynxOS 5.004_02 +Perl is available for most operating systems, including virtually +all Unix-like platforms. See L<perlport/"Supported Platforms"> +for a listing. =head1 ENVIRONMENT @@ -322,12 +233,12 @@ See L<perlrun>. =head1 AUTHOR -Larry Wall <F<larry@wall.org>>, with the help of oodles of other folks. +Larry Wall <larry@wall.org>, with the help of oodles of other folks. -If your Perl success stories and testimonials may be of help to others -who wish to advocate the use of Perl in their applications, -or if you wish to simply express your gratitude to Larry and the -Perl developers, please write to <F<perl-thanks@perl.org>>. +If your Perl success stories and testimonials may be of help to others +who wish to advocate the use of Perl in their applications, +or if you wish to simply express your gratitude to Larry and the +Perl developers, please write to perl-thanks@perl.org . =head1 FILES @@ -336,12 +247,15 @@ Perl developers, please write to <F<perl-thanks@perl.org>>. =head1 SEE ALSO a2p awk to perl translator - s2p sed to perl translator + http://www.perl.com/ the Perl Home Page + http://www.perl.com/CPAN the Comprehensive Perl Archive + =head1 DIAGNOSTICS -The B<-w> switch produces some lovely diagnostics. +The C<use warnings> pragma (and the B<-w> switch) produces some +lovely diagnostics. See L<perldiag> for explanations of all Perl's diagnostics. The C<use diagnostics> pragma automatically turns Perl's normally terse warnings @@ -349,7 +263,7 @@ and errors into these longer forms. Compilation errors will tell you the line number of the error, with an indication of the next token or token type that was to be examined. -(In the case of a script passed to Perl via B<-e> switches, each +(In a script passed to Perl via B<-e> switches, each B<-e> is counted as one line.) Setuid scripts have additional constraints that can produce error @@ -378,10 +292,10 @@ so they are limited to a maximum of 65535 (higher numbers usually being affected by wraparound). You may mail your bug reports (be sure to include full configuration -information as output by the myconfig program in the perl source tree, -or by C<perl -V>) to <F<perlbug@perl.com>>. -If you've succeeded in compiling perl, the perlbug script in the utils/ -subdirectory can be used to help mail in a bug report. +information as output by the myconfig program in the perl source +tree, or by C<perl -V>) to perlbug@perl.com . If you've succeeded +in compiling perl, the B<perlbug> script in the F<utils/> subdirectory +can be used to help mail in a bug report. Perl actually stands for Pathologically Eclectic Rubbish Lister, but don't tell anyone I said that. diff --git a/contrib/perl5/pod/perl5004delta.pod b/contrib/perl5/pod/perl5004delta.pod index 323830b..85a8f96 100644 --- a/contrib/perl5/pod/perl5004delta.pod +++ b/contrib/perl5/pod/perl5004delta.pod @@ -79,7 +79,7 @@ your scripts. Before Perl 5.004, C<AUTOLOAD> functions were looked up as methods (using the C<@ISA> hierarchy), even when the function to be autoloaded was called as a plain function (e.g. C<Foo::bar()>), not a method -(e.g. C<Foo-E<gt>bar()> or C<$obj-E<gt>bar()>). +(e.g. C<< Foo->bar() >> or C<< $obj->bar() >>). Perl 5.005 will use method lookup only for methods' C<AUTOLOAD>s. However, there is a significant base of existing code that may be using @@ -266,11 +266,11 @@ A subroutine reference may now be suffixed with an arrow and a (possibly empty) parameter list. This syntax denotes a call of the referenced subroutine, with the given parameters (if any). -This new syntax follows the pattern of S<C<$hashref-E<gt>{FOO}>> and -S<C<$aryref-E<gt>[$foo]>>: You may now write S<C<&$subref($foo)>> as -S<C<$subref-E<gt>($foo)>>. All of these arrow terms may be chained; -thus, S<C<&{$table-E<gt>{FOO}}($bar)>> may now be written -S<C<$table-E<gt>{FOO}-E<gt>($bar)>>. +This new syntax follows the pattern of S<C<< $hashref->{FOO} >>> and +S<C<< $aryref->[$foo] >>>: You may now write S<C<&$subref($foo)>> as +S<C<< $subref->($foo) >>>. All these arrow terms may be chained; +thus, S<C<< &{$table->{FOO}}($bar) >>> may now be written +S<C<< $table->{FOO}->($bar) >>>. =back @@ -758,7 +758,7 @@ details on how to get started with building this port. There is also support for building perl under the Cygwin32 environment. Cygwin32 is a set of GNU tools that make it possible to compile and run -many UNIX programs under Windows NT by providing a mostly UNIX-like +many Unix programs under Windows NT by providing a mostly Unix-like interface for compilation and execution. See F<README.cygwin32> in the perl distribution for more details on this port and how to obtain the Cygwin32 toolkit. @@ -936,7 +936,7 @@ requested with the ":flock" tag (e.g. C<use Fcntl ':flock'>). =head2 IO -The IO module provides a simple mechanism to load all of the IO modules at one +The IO module provides a simple mechanism to load all the IO modules at one go. Currently this includes: IO::Handle @@ -1290,7 +1290,7 @@ likely to eliminate these arbitrary limitations. (F) A carriage return character was found in the input. This is an error, and not a warning, because carriage return characters can break -multi-line strings, including here documents (e.g., C<print E<lt>E<lt>EOF;>). +multi-line strings, including here documents (e.g., C<print <<EOF;>). =item Illegal switch in PERL5OPT: %s @@ -1312,7 +1312,7 @@ architecture. On a 32-bit architecture the largest octal literal is =item internal error: glob failed (P) Something went wrong with the external program(s) used for C<glob> -and C<E<lt>*.cE<gt>>. This may mean that your csh (C shell) is +and C<< <*.c> >>. This may mean that your csh (C shell) is broken. If so, you should change all of the csh-related variables in config.sh: If you have tcsh, make the variables refer to it as if it were csh (e.g. C<full_csh='/usr/bin/tcsh'>); otherwise, make them all diff --git a/contrib/perl5/pod/perl5005delta.pod b/contrib/perl5/pod/perl5005delta.pod new file mode 100644 index 0000000..b133c0d --- /dev/null +++ b/contrib/perl5/pod/perl5005delta.pod @@ -0,0 +1,989 @@ +=head1 NAME + +perldelta - what's new for perl5.005 + +=head1 DESCRIPTION + +This document describes differences between the 5.004 release and this one. + +=head1 About the new versioning system + +Perl is now developed on two tracks: a maintenance track that makes +small, safe updates to released production versions with emphasis on +compatibility; and a development track that pursues more aggressive +evolution. Maintenance releases (which should be considered production +quality) have subversion numbers that run from C<1> to C<49>, and +development releases (which should be considered "alpha" quality) run +from C<50> to C<99>. + +Perl 5.005 is the combined product of the new dual-track development +scheme. + +=head1 Incompatible Changes + +=head2 WARNING: This version is not binary compatible with Perl 5.004. + +Starting with Perl 5.004_50 there were many deep and far-reaching changes +to the language internals. If you have dynamically loaded extensions +that you built under perl 5.003 or 5.004, you can continue to use them +with 5.004, but you will need to rebuild and reinstall those extensions +to use them 5.005. See F<INSTALL> for detailed instructions on how to +upgrade. + +=head2 Default installation structure has changed + +The new Configure defaults are designed to allow a smooth upgrade from +5.004 to 5.005, but you should read F<INSTALL> for a detailed +discussion of the changes in order to adapt them to your system. + +=head2 Perl Source Compatibility + +When none of the experimental features are enabled, there should be +very few user-visible Perl source compatibility issues. + +If threads are enabled, then some caveats apply. C<@_> and C<$_> become +lexical variables. The effect of this should be largely transparent to +the user, but there are some boundary conditions under which user will +need to be aware of the issues. For example, C<local(@_)> results in +a "Can't localize lexical variable @_ ..." message. This may be enabled +in a future version. + +Some new keywords have been introduced. These are generally expected to +have very little impact on compatibility. See L<New C<INIT> keyword>, +L<New C<lock> keyword>, and L<New C<qr//> operator>. + +Certain barewords are now reserved. Use of these will provoke a warning +if you have asked for them with the C<-w> switch. +See L<C<our> is now a reserved word>. + +=head2 C Source Compatibility + +There have been a large number of changes in the internals to support +the new features in this release. + +=over 4 + +=item Core sources now require ANSI C compiler + +An ANSI C compiler is now B<required> to build perl. See F<INSTALL>. + +=item All Perl global variables must now be referenced with an explicit prefix + +All Perl global variables that are visible for use by extensions now +have a C<PL_> prefix. New extensions should C<not> refer to perl globals +by their unqualified names. To preserve sanity, we provide limited +backward compatibility for globals that are being widely used like +C<sv_undef> and C<na> (which should now be written as C<PL_sv_undef>, +C<PL_na> etc.) + +If you find that your XS extension does not compile anymore because a +perl global is not visible, try adding a C<PL_> prefix to the global +and rebuild. + +It is strongly recommended that all functions in the Perl API that don't +begin with C<perl> be referenced with a C<Perl_> prefix. The bare function +names without the C<Perl_> prefix are supported with macros, but this +support may cease in a future release. + +See L<perlguts/"API LISTING">. + +=item Enabling threads has source compatibility issues + +Perl built with threading enabled requires extensions to use the new +C<dTHR> macro to initialize the handle to access per-thread data. +If you see a compiler error that talks about the variable C<thr> not +being declared (when building a module that has XS code), you need +to add C<dTHR;> at the beginning of the block that elicited the error. + +The API function C<perl_get_sv("@",FALSE)> should be used instead of +directly accessing perl globals as C<GvSV(errgv)>. The API call is +backward compatible with existing perls and provides source compatibility +with threading is enabled. + +See L<"C Source Compatibility"> for more information. + +=back + +=head2 Binary Compatibility + +This version is NOT binary compatible with older versions. All extensions +will need to be recompiled. Further binaries built with threads enabled +are incompatible with binaries built without. This should largely be +transparent to the user, as all binary incompatible configurations have +their own unique architecture name, and extension binaries get installed at +unique locations. This allows coexistence of several configurations in +the same directory hierarchy. See F<INSTALL>. + +=head2 Security fixes may affect compatibility + +A few taint leaks and taint omissions have been corrected. This may lead +to "failure" of scripts that used to work with older versions. Compiling +with -DINCOMPLETE_TAINTS provides a perl with minimal amounts of changes +to the tainting behavior. But note that the resulting perl will have +known insecurities. + +Oneliners with the C<-e> switch do not create temporary files anymore. + +=head2 Relaxed new mandatory warnings introduced in 5.004 + +Many new warnings that were introduced in 5.004 have been made +optional. Some of these warnings are still present, but perl's new +features make them less often a problem. See L<New Diagnostics>. + +=head2 Licensing + +Perl has a new Social Contract for contributors. See F<Porting/Contract>. + +The license included in much of the Perl documentation has changed. +Most of the Perl documentation was previously under the implicit GNU +General Public License or the Artistic License (at the user's choice). +Now much of the documentation unambiguously states the terms under which +it may be distributed. Those terms are in general much less restrictive +than the GNU GPL. See L<perl> and the individual perl man pages listed +therein. + +=head1 Core Changes + + +=head2 Threads + +WARNING: Threading is considered an B<experimental> feature. Details of the +implementation may change without notice. There are known limitations +and some bugs. These are expected to be fixed in future versions. + +See F<README.threads>. + +=head2 Compiler + +WARNING: The Compiler and related tools are considered B<experimental>. +Features may change without notice, and there are known limitations +and bugs. Since the compiler is fully external to perl, the default +configuration will build and install it. + +The Compiler produces three different types of transformations of a +perl program. The C backend generates C code that captures perl's state +just before execution begins. It eliminates the compile-time overheads +of the regular perl interpreter, but the run-time performance remains +comparatively the same. The CC backend generates optimized C code +equivalent to the code path at run-time. The CC backend has greater +potential for big optimizations, but only a few optimizations are +implemented currently. The Bytecode backend generates a platform +independent bytecode representation of the interpreter's state +just before execution. Thus, the Bytecode back end also eliminates +much of the compilation overhead of the interpreter. + +The compiler comes with several valuable utilities. + +C<B::Lint> is an experimental module to detect and warn about suspicious +code, especially the cases that the C<-w> switch does not detect. + +C<B::Deparse> can be used to demystify perl code, and understand +how perl optimizes certain constructs. + +C<B::Xref> generates cross reference reports of all definition and use +of variables, subroutines and formats in a program. + +C<B::Showlex> show the lexical variables used by a subroutine or file +at a glance. + +C<perlcc> is a simple frontend for compiling perl. + +See C<ext/B/README>, L<B>, and the respective compiler modules. + +=head2 Regular Expressions + +Perl's regular expression engine has been seriously overhauled, and +many new constructs are supported. Several bugs have been fixed. + +Here is an itemized summary: + +=over 4 + +=item Many new and improved optimizations + +Changes in the RE engine: + + Unneeded nodes removed; + Substrings merged together; + New types of nodes to process (SUBEXPR)* and similar expressions + quickly, used if the SUBEXPR has no side effects and matches + strings of the same length; + Better optimizations by lookup for constant substrings; + Better search for constants substrings anchored by $ ; + +Changes in Perl code using RE engine: + + More optimizations to s/longer/short/; + study() was not working; + /blah/ may be optimized to an analogue of index() if $& $` $' not seen; + Unneeded copying of matched-against string removed; + Only matched part of the string is copying if $` $' were not seen; + +=item Many bug fixes + +Note that only the major bug fixes are listed here. See F<Changes> for others. + + Backtracking might not restore start of $3. + No feedback if max count for * or + on "complex" subexpression + was reached, similarly (but at compile time) for {3,34567} + Primitive restrictions on max count introduced to decrease a + possibility of a segfault; + (ZERO-LENGTH)* could segfault; + (ZERO-LENGTH)* was prohibited; + Long REs were not allowed; + /RE/g could skip matches at the same position after a + zero-length match; + +=item New regular expression constructs + +The following new syntax elements are supported: + + (?<=RE) + (?<!RE) + (?{ CODE }) + (?i-x) + (?i:RE) + (?(COND)YES_RE|NO_RE) + (?>RE) + \z + +=item New operator for precompiled regular expressions + +See L<New C<qr//> operator>. + +=item Other improvements + + Better debugging output (possibly with colors), + even from non-debugging Perl; + RE engine code now looks like C, not like assembler; + Behaviour of RE modifiable by `use re' directive; + Improved documentation; + Test suite significantly extended; + Syntax [:^upper:] etc., reserved inside character classes; + +=item Incompatible changes + + (?i) localized inside enclosing group; + $( is not interpolated into RE any more; + /RE/g may match at the same position (with non-zero length) + after a zero-length match (bug fix). + +=back + +See L<perlre> and L<perlop>. + +=head2 Improved malloc() + +See banner at the beginning of C<malloc.c> for details. + +=head2 Quicksort is internally implemented + +Perl now contains its own highly optimized qsort() routine. The new qsort() +is resistant to inconsistent comparison functions, so Perl's C<sort()> will +not provoke coredumps any more when given poorly written sort subroutines. +(Some C library C<qsort()>s that were being used before used to have this +problem.) In our testing, the new C<qsort()> required the minimal number +of pair-wise compares on average, among all known C<qsort()> implementations. + +See C<perlfunc/sort>. + +=head2 Reliable signals + +Perl's signal handling is susceptible to random crashes, because signals +arrive asynchronously, and the Perl runtime is not reentrant at arbitrary +times. + +However, one experimental implementation of reliable signals is available +when threads are enabled. See C<Thread::Signal>. Also see F<INSTALL> for +how to build a Perl capable of threads. + +=head2 Reliable stack pointers + +The internals now reallocate the perl stack only at predictable times. +In particular, magic calls never trigger reallocations of the stack, +because all reentrancy of the runtime is handled using a "stack of stacks". +This should improve reliability of cached stack pointers in the internals +and in XSUBs. + +=head2 More generous treatment of carriage returns + +Perl used to complain if it encountered literal carriage returns in +scripts. Now they are mostly treated like whitespace within program text. +Inside string literals and here documents, literal carriage returns are +ignored if they occur paired with linefeeds, or get interpreted as whitespace +if they stand alone. This behavior means that literal carriage returns +in files should be avoided. You can get the older, more compatible (but +less generous) behavior by defining the preprocessor symbol +C<PERL_STRICT_CR> when building perl. Of course, all this has nothing +whatever to do with how escapes like C<\r> are handled within strings. + +Note that this doesn't somehow magically allow you to keep all text files +in DOS format. The generous treatment only applies to files that perl +itself parses. If your C compiler doesn't allow carriage returns in +files, you may still be unable to build modules that need a C compiler. + +=head2 Memory leaks + +C<substr>, C<pos> and C<vec> don't leak memory anymore when used in lvalue +context. Many small leaks that impacted applications that embed multiple +interpreters have been fixed. + +=head2 Better support for multiple interpreters + +The build-time option C<-DMULTIPLICITY> has had many of the details +reworked. Some previously global variables that should have been +per-interpreter now are. With care, this allows interpreters to call +each other. See the C<PerlInterp> extension on CPAN. + +=head2 Behavior of local() on array and hash elements is now well-defined + +See L<perlsub/"Temporary Values via local()">. + +=head2 C<%!> is transparently tied to the L<Errno> module + +See L<perlvar>, and L<Errno>. + +=head2 Pseudo-hashes are supported + +See L<perlref>. + +=head2 C<EXPR foreach EXPR> is supported + +See L<perlsyn>. + +=head2 Keywords can be globally overridden + +See L<perlsub>. + +=head2 C<$^E> is meaningful on Win32 + +See L<perlvar>. + +=head2 C<foreach (1..1000000)> optimized + +C<foreach (1..1000000)> is now optimized into a counting loop. It does +not try to allocate a 1000000-size list anymore. + +=head2 C<Foo::> can be used as implicitly quoted package name + +Barewords caused unintuitive behavior when a subroutine with the same +name as a package happened to be defined. Thus, C<new Foo @args>, +use the result of the call to C<Foo()> instead of C<Foo> being treated +as a literal. The recommended way to write barewords in the indirect +object slot is C<new Foo:: @args>. Note that the method C<new()> is +called with a first argument of C<Foo>, not C<Foo::> when you do that. + +=head2 C<exists $Foo::{Bar::}> tests existence of a package + +It was impossible to test for the existence of a package without +actually creating it before. Now C<exists $Foo::{Bar::}> can be +used to test if the C<Foo::Bar> namespace has been created. + +=head2 Better locale support + +See L<perllocale>. + +=head2 Experimental support for 64-bit platforms + +Perl5 has always had 64-bit support on systems with 64-bit longs. +Starting with 5.005, the beginnings of experimental support for systems +with 32-bit long and 64-bit 'long long' integers has been added. +If you add -DUSE_LONG_LONG to your ccflags in config.sh (or manually +define it in perl.h) then perl will be built with 'long long' support. +There will be many compiler warnings, and the resultant perl may not +work on all systems. There are many other issues related to +third-party extensions and libraries. This option exists to allow +people to work on those issues. + +=head2 prototype() returns useful results on builtins + +See L<perlfunc/prototype>. + +=head2 Extended support for exception handling + +C<die()> now accepts a reference value, and C<$@> gets set to that +value in exception traps. This makes it possible to propagate +exception objects. This is an undocumented B<experimental> feature. + +=head2 Re-blessing in DESTROY() supported for chaining DESTROY() methods + +See L<perlobj/Destructors>. + +=head2 All C<printf> format conversions are handled internally + +See L<perlfunc/printf>. + +=head2 New C<INIT> keyword + +C<INIT> subs are like C<BEGIN> and C<END>, but they get run just before +the perl runtime begins execution. e.g., the Perl Compiler makes use of +C<INIT> blocks to initialize and resolve pointers to XSUBs. + +=head2 New C<lock> keyword + +The C<lock> keyword is the fundamental synchronization primitive +in threaded perl. When threads are not enabled, it is currently a noop. + +To minimize impact on source compatibility this keyword is "weak", i.e., any +user-defined subroutine of the same name overrides it, unless a C<use Thread> +has been seen. + +=head2 New C<qr//> operator + +The C<qr//> operator, which is syntactically similar to the other quote-like +operators, is used to create precompiled regular expressions. This compiled +form can now be explicitly passed around in variables, and interpolated in +other regular expressions. See L<perlop>. + +=head2 C<our> is now a reserved word + +Calling a subroutine with the name C<our> will now provoke a warning when +using the C<-w> switch. + +=head2 Tied arrays are now fully supported + +See L<Tie::Array>. + +=head2 Tied handles support is better + +Several missing hooks have been added. There is also a new base class for +TIEARRAY implementations. See L<Tie::Array>. + +=head2 4th argument to substr + +substr() can now both return and replace in one operation. The optional +4th argument is the replacement string. See L<perlfunc/substr>. + +=head2 Negative LENGTH argument to splice + +splice() with a negative LENGTH argument now work similar to what the +LENGTH did for substr(). Previously a negative LENGTH was treated as +0. See L<perlfunc/splice>. + +=head2 Magic lvalues are now more magical + +When you say something like C<substr($x, 5) = "hi">, the scalar returned +by substr() is special, in that any modifications to it affect $x. +(This is called a 'magic lvalue' because an 'lvalue' is something on +the left side of an assignment.) Normally, this is exactly what you +would expect to happen, but Perl uses the same magic if you use substr(), +pos(), or vec() in a context where they might be modified, like taking +a reference with C<\> or as an argument to a sub that modifies C<@_>. +In previous versions, this 'magic' only went one way, but now changes +to the scalar the magic refers to ($x in the above example) affect the +magic lvalue too. For instance, this code now acts differently: + + $x = "hello"; + sub printit { + $x = "g'bye"; + print $_[0], "\n"; + } + printit(substr($x, 0, 5)); + +In previous versions, this would print "hello", but it now prints "g'bye". + +=head2 <> now reads in records + +If C<$/> is a reference to an integer, or a scalar that holds an integer, +<> will read in records instead of lines. For more info, see +L<perlvar/$/>. + +=head1 Supported Platforms + +Configure has many incremental improvements. Site-wide policy for building +perl can now be made persistent, via Policy.sh. Configure also records +the command-line arguments used in F<config.sh>. + +=head2 New Platforms + +BeOS is now supported. See F<README.beos>. + +DOS is now supported under the DJGPP tools. See F<README.dos> (installed +as L<perldos> on some systems). + +MiNT is now supported. See F<README.mint>. + +MPE/iX is now supported. See F<README.mpeix>. + +MVS (aka OS390, aka Open Edition) is now supported. See F<README.os390> +(installed as L<perlos390> on some systems). + +Stratus VOS is now supported. See F<README.vos>. + +=head2 Changes in existing support + +Win32 support has been vastly enhanced. Support for Perl Object, a C++ +encapsulation of Perl. GCC and EGCS are now supported on Win32. +See F<README.win32>, aka L<perlwin32>. + +VMS configuration system has been rewritten. See F<README.vms> (installed +as L<README_vms> on some systems). + +The hints files for most Unix platforms have seen incremental improvements. + +=head1 Modules and Pragmata + +=head2 New Modules + +=over + +=item B + +Perl compiler and tools. See L<B>. + +=item Data::Dumper + +A module to pretty print Perl data. See L<Data::Dumper>. + +=item Dumpvalue + +A module to dump perl values to the screen. See L<Dumpvalue>. + +=item Errno + +A module to look up errors more conveniently. See L<Errno>. + +=item File::Spec + +A portable API for file operations. + +=item ExtUtils::Installed + +Query and manage installed modules. + +=item ExtUtils::Packlist + +Manipulate .packlist files. + +=item Fatal + +Make functions/builtins succeed or die. + +=item IPC::SysV + +Constants and other support infrastructure for System V IPC operations +in perl. + +=item Test + +A framework for writing testsuites. + +=item Tie::Array + +Base class for tied arrays. + +=item Tie::Handle + +Base class for tied handles. + +=item Thread + +Perl thread creation, manipulation, and support. + +=item attrs + +Set subroutine attributes. + +=item fields + +Compile-time class fields. + +=item re + +Various pragmata to control behavior of regular expressions. + +=back + +=head2 Changes in existing modules + +=over + +=item Benchmark + +You can now run tests for I<x> seconds instead of guessing the right +number of tests to run. + +=item Carp + +Carp has a new function cluck(). cluck() warns, like carp(), but also adds +a stack backtrace to the error message, like confess(). + +=item CGI + +CGI has been updated to version 2.42. + +=item Fcntl + +More Fcntl constants added: F_SETLK64, F_SETLKW64, O_LARGEFILE for +large (more than 4G) file access (the 64-bit support is not yet +working, though, so no need to get overly excited), Free/Net/OpenBSD +locking behaviour flags F_FLOCK, F_POSIX, Linux F_SHLCK, and +O_ACCMODE: the mask of O_RDONLY, O_WRONLY, and O_RDWR. + +=item Math::Complex + +The accessors methods Re, Im, arg, abs, rho, theta, methods can +($z->Re()) now also act as mutators ($z->Re(3)). + +=item Math::Trig + +A little bit of radial trigonometry (cylindrical and spherical) added, +for example the great circle distance. + +=item POSIX + +POSIX now has its own platform-specific hints files. + +=item DB_File + +DB_File supports version 2.x of Berkeley DB. See C<ext/DB_File/Changes>. + +=item MakeMaker + +MakeMaker now supports writing empty makefiles, provides a way to +specify that site umask() policy should be honored. There is also +better support for manipulation of .packlist files, and getting +information about installed modules. + +Extensions that have both architecture-dependent and +architecture-independent files are now always installed completely in +the architecture-dependent locations. Previously, the shareable parts +were shared both across architectures and across perl versions and were +therefore liable to be overwritten with newer versions that might have +subtle incompatibilities. + +=item CPAN + +See <perlmodinstall> and L<CPAN>. + +=item Cwd + +Cwd::cwd is faster on most platforms. + +=item Benchmark + +Keeps better time. + +=back + +=head1 Utility Changes + +C<h2ph> and related utilities have been vastly overhauled. + +C<perlcc>, a new experimental front end for the compiler is available. + +The crude GNU C<configure> emulator is now called C<configure.gnu> to +avoid trampling on C<Configure> under case-insensitive filesystems. + +C<perldoc> used to be rather slow. The slower features are now optional. +In particular, case-insensitive searches need the C<-i> switch, and +recursive searches need C<-r>. You can set these switches in the +C<PERLDOC> environment variable to get the old behavior. + +=head1 Documentation Changes + +Config.pm now has a glossary of variables. + +F<Porting/patching.pod> has detailed instructions on how to create and +submit patches for perl. + +L<perlport> specifies guidelines on how to write portably. + +L<perlmodinstall> describes how to fetch and install modules from C<CPAN> +sites. + +Some more Perl traps are documented now. See L<perltrap>. + +L<perlopentut> gives a tutorial on using open(). + +L<perlreftut> gives a tutorial on references. + +L<perlthrtut> gives a tutorial on threads. + +=head1 New Diagnostics + +=over + +=item Ambiguous call resolved as CORE::%s(), qualify as such or use & + +(W) A subroutine you have declared has the same name as a Perl keyword, +and you have used the name without qualification for calling one or the +other. Perl decided to call the builtin because the subroutine is +not imported. + +To force interpretation as a subroutine call, either put an ampersand +before the subroutine name, or qualify the name with its package. +Alternatively, you can import the subroutine (or pretend that it's +imported with the C<use subs> pragma). + +To silently interpret it as the Perl operator, use the C<CORE::> prefix +on the operator (e.g. C<CORE::log($x)>) or by declaring the subroutine +to be an object method (see L<attrs>). + +=item Bad index while coercing array into hash + +(F) The index looked up in the hash found as the 0'th element of a +pseudo-hash is not legal. Index values must be at 1 or greater. +See L<perlref>. + +=item Bareword "%s" refers to nonexistent package + +(W) You used a qualified bareword of the form C<Foo::>, but +the compiler saw no other uses of that namespace before that point. +Perhaps you need to predeclare a package? + +=item Can't call method "%s" on an undefined value + +(F) You used the syntax of a method call, but the slot filled by the +object reference or package name contains an undefined value. +Something like this will reproduce the error: + + $BADREF = 42; + process $BADREF 1,2,3; + $BADREF->process(1,2,3); + +=item Can't check filesystem of script "%s" for nosuid + +(P) For some reason you can't check the filesystem of the script for nosuid. + +=item Can't coerce array into hash + +(F) You used an array where a hash was expected, but the array has no +information on how to map from keys to array indices. You can do that +only with arrays that have a hash reference at index 0. + +=item Can't goto subroutine from an eval-string + +(F) The "goto subroutine" call can't be used to jump out of an eval "string". +(You can use it to jump out of an eval {BLOCK}, but you probably don't want to.) + +=item Can't localize pseudo-hash element + +(F) You said something like C<< local $ar->{'key'} >>, where $ar is +a reference to a pseudo-hash. That hasn't been implemented yet, but +you can get a similar effect by localizing the corresponding array +element directly -- C<< local $ar->[$ar->[0]{'key'}] >>. + +=item Can't use %%! because Errno.pm is not available + +(F) The first time the %! hash is used, perl automatically loads the +Errno.pm module. The Errno module is expected to tie the %! hash to +provide symbolic names for C<$!> errno values. + +=item Cannot find an opnumber for "%s" + +(F) A string of a form C<CORE::word> was given to prototype(), but +there is no builtin with the name C<word>. + +=item Character class syntax [. .] is reserved for future extensions + +(W) Within regular expression character classes ([]) the syntax beginning +with "[." and ending with ".]" is reserved for future extensions. +If you need to represent those character sequences inside a regular +expression character class, just quote the square brackets with the +backslash: "\[." and ".\]". + +=item Character class syntax [: :] is reserved for future extensions + +(W) Within regular expression character classes ([]) the syntax beginning +with "[:" and ending with ":]" is reserved for future extensions. +If you need to represent those character sequences inside a regular +expression character class, just quote the square brackets with the +backslash: "\[:" and ":\]". + +=item Character class syntax [= =] is reserved for future extensions + +(W) Within regular expression character classes ([]) the syntax +beginning with "[=" and ending with "=]" is reserved for future extensions. +If you need to represent those character sequences inside a regular +expression character class, just quote the square brackets with the +backslash: "\[=" and "=\]". + +=item %s: Eval-group in insecure regular expression + +(F) Perl detected tainted data when trying to compile a regular expression +that contains the C<(?{ ... })> zero-width assertion, which is unsafe. +See L<perlre/(?{ code })>, and L<perlsec>. + +=item %s: Eval-group not allowed, use re 'eval' + +(F) A regular expression contained the C<(?{ ... })> zero-width assertion, +but that construct is only allowed when the C<use re 'eval'> pragma is +in effect. See L<perlre/(?{ code })>. + +=item %s: Eval-group not allowed at run time + +(F) Perl tried to compile a regular expression containing the C<(?{ ... })> +zero-width assertion at run time, as it would when the pattern contains +interpolated values. Since that is a security risk, it is not allowed. +If you insist, you may still do this by explicitly building the pattern +from an interpolated string at run time and using that in an eval(). +See L<perlre/(?{ code })>. + +=item Explicit blessing to '' (assuming package main) + +(W) You are blessing a reference to a zero length string. This has +the effect of blessing the reference into the package main. This is +usually not what you want. Consider providing a default target +package, e.g. bless($ref, $p || 'MyPackage'); + +=item Illegal hex digit ignored + +(W) You may have tried to use a character other than 0 - 9 or A - F in a +hexadecimal number. Interpretation of the hexadecimal number stopped +before the illegal character. + +=item No such array field + +(F) You tried to access an array as a hash, but the field name used is +not defined. The hash at index 0 should map all valid field names to +array indices for that to work. + +=item No such field "%s" in variable %s of type %s + +(F) You tried to access a field of a typed variable where the type +does not know about the field name. The field names are looked up in +the %FIELDS hash in the type package at compile time. The %FIELDS hash +is usually set up with the 'fields' pragma. + +=item Out of memory during ridiculously large request + +(F) You can't allocate more than 2^31+"small amount" bytes. This error +is most likely to be caused by a typo in the Perl program. e.g., C<$arr[time]> +instead of C<$arr[$time]>. + +=item Range iterator outside integer range + +(F) One (or both) of the numeric arguments to the range operator ".." +are outside the range which can be represented by integers internally. +One possible workaround is to force Perl to use magical string +increment by prepending "0" to your numbers. + +=item Recursive inheritance detected while looking for method '%s' in package '%s' + +(F) More than 100 levels of inheritance were encountered while invoking a +method. Probably indicates an unintended loop in your inheritance hierarchy. + +=item Reference found where even-sized list expected + +(W) You gave a single reference where Perl was expecting a list with +an even number of elements (for assignment to a hash). This +usually means that you used the anon hash constructor when you meant +to use parens. In any case, a hash requires key/value B<pairs>. + + %hash = { one => 1, two => 2, }; # WRONG + %hash = [ qw/ an anon array / ]; # WRONG + %hash = ( one => 1, two => 2, ); # right + %hash = qw( one 1 two 2 ); # also fine + +=item Undefined value assigned to typeglob + +(W) An undefined value was assigned to a typeglob, a la C<*foo = undef>. +This does nothing. It's possible that you really mean C<undef *foo>. + +=item Use of reserved word "%s" is deprecated + +(D) The indicated bareword is a reserved word. Future versions of perl +may use it as a keyword, so you're better off either explicitly quoting +the word in a manner appropriate for its context of use, or using a +different name altogether. The warning can be suppressed for subroutine +names by either adding a C<&> prefix, or using a package qualifier, +e.g. C<&our()>, or C<Foo::our()>. + +=item perl: warning: Setting locale failed. + +(S) The whole warning message will look something like: + + perl: warning: Setting locale failed. + perl: warning: Please check that your locale settings: + LC_ALL = "En_US", + LANG = (unset) + are supported and installed on your system. + perl: warning: Falling back to the standard locale ("C"). + +Exactly what were the failed locale settings varies. In the above the +settings were that the LC_ALL was "En_US" and the LANG had no value. +This error means that Perl detected that you and/or your system +administrator have set up the so-called variable system but Perl could +not use those settings. This was not dead serious, fortunately: there +is a "default locale" called "C" that Perl can and will use, the +script will be run. Before you really fix the problem, however, you +will get the same error message each time you run Perl. How to really +fix the problem can be found in L<perllocale/"LOCALE PROBLEMS">. + +=back + + +=head1 Obsolete Diagnostics + +=over + +=item Can't mktemp() + +(F) The mktemp() routine failed for some reason while trying to process +a B<-e> switch. Maybe your /tmp partition is full, or clobbered. + +Removed because B<-e> doesn't use temporary files any more. + +=item Can't write to temp file for B<-e>: %s + +(F) The write routine failed for some reason while trying to process +a B<-e> switch. Maybe your /tmp partition is full, or clobbered. + +Removed because B<-e> doesn't use temporary files any more. + +=item Cannot open temporary file + +(F) The create routine failed for some reason while trying to process +a B<-e> switch. Maybe your /tmp partition is full, or clobbered. + +Removed because B<-e> doesn't use temporary files any more. + +=item regexp too big + +(F) The current implementation of regular expressions uses shorts as +address offsets within a string. Unfortunately this means that if +the regular expression compiles to longer than 32767, it'll blow up. +Usually when you want a regular expression this big, there is a better +way to do it with multiple statements. See L<perlre>. + +=back + +=head1 Configuration Changes + +You can use "Configure -Uinstallusrbinperl" which causes installperl +to skip installing perl also as /usr/bin/perl. This is useful if you +prefer not to modify /usr/bin for some reason or another but harmful +because many scripts assume to find Perl in /usr/bin/perl. + +=head1 BUGS + +If you find what you think is a bug, you might check the headers of +recently posted articles in the comp.lang.perl.misc newsgroup. +There may also be information at http://www.perl.com/perl/, the Perl +Home Page. + +If you believe you have an unreported bug, please run the B<perlbug> +program included with your release. Make sure you trim your bug down +to a tiny but sufficient test case. Your bug report, along with the +output of C<perl -V>, will be sent off to <F<perlbug@perl.com>> to be +analysed by the Perl porting team. + +=head1 SEE ALSO + +The F<Changes> file for exhaustive details on what changed. + +The F<INSTALL> file for how to build Perl. + +The F<README> file for general stuff. + +The F<Artistic> and F<Copying> files for copyright information. + +=head1 HISTORY + +Written by Gurusamy Sarathy <F<gsar@activestate.com>>, with many contributions +from The Perl Porters. + +Send omissions or corrections to <F<perlbug@perl.com>>. + +=cut diff --git a/contrib/perl5/pod/perlapi.pod b/contrib/perl5/pod/perlapi.pod new file mode 100644 index 0000000..e0ae4cf --- /dev/null +++ b/contrib/perl5/pod/perlapi.pod @@ -0,0 +1,2232 @@ +=head1 NAME + +perlapi - autogenerated documentation for the perl public API + +=head1 DESCRIPTION + +This file contains the documentation of the perl public API generated by +embed.pl, specifically a listing of functions, macros, flags, and variables +that may be used by extension writers. The interfaces of any functions that +are not listed here are subject to change without notice. For this reason, +blindly using functions listed in proto.h is to be avoided when writing +extensions. + +Note that all Perl API global variables must be referenced with the C<PL_> +prefix. Some macros are provided for compatibility with the older, +unadorned names, but this support may be disabled in a future release. + +The listing is alphabetical, case insensitive. + +=over 8 + +=item AvFILL + +Same as C<av_len()>. Deprecated, use C<av_len()> instead. + + int AvFILL(AV* av) + +=item av_clear + +Clears an array, making it empty. Does not free the memory used by the +array itself. + + void av_clear(AV* ar) + +=item av_extend + +Pre-extend an array. The C<key> is the index to which the array should be +extended. + + void av_extend(AV* ar, I32 key) + +=item av_fetch + +Returns the SV at the specified index in the array. The C<key> is the +index. If C<lval> is set then the fetch will be part of a store. Check +that the return value is non-null before dereferencing it to a C<SV*>. + +See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for +more information on how to use this function on tied arrays. + + SV** av_fetch(AV* ar, I32 key, I32 lval) + +=item av_len + +Returns the highest index in the array. Returns -1 if the array is +empty. + + I32 av_len(AV* ar) + +=item av_make + +Creates a new AV and populates it with a list of SVs. The SVs are copied +into the array, so they may be freed after the call to av_make. The new AV +will have a reference count of 1. + + AV* av_make(I32 size, SV** svp) + +=item av_pop + +Pops an SV off the end of the array. Returns C<&PL_sv_undef> if the array +is empty. + + SV* av_pop(AV* ar) + +=item av_push + +Pushes an SV onto the end of the array. The array will grow automatically +to accommodate the addition. + + void av_push(AV* ar, SV* val) + +=item av_shift + +Shifts an SV off the beginning of the array. + + SV* av_shift(AV* ar) + +=item av_store + +Stores an SV in an array. The array index is specified as C<key>. The +return value will be NULL if the operation failed or if the value did not +need to be actually stored within the array (as in the case of tied +arrays). Otherwise it can be dereferenced to get the original C<SV*>. Note +that the caller is responsible for suitably incrementing the reference +count of C<val> before the call, and decrementing it if the function +returned NULL. + +See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for +more information on how to use this function on tied arrays. + + SV** av_store(AV* ar, I32 key, SV* val) + +=item av_undef + +Undefines the array. Frees the memory used by the array itself. + + void av_undef(AV* ar) + +=item av_unshift + +Unshift the given number of C<undef> values onto the beginning of the +array. The array will grow automatically to accommodate the addition. You +must then use C<av_store> to assign values to these new elements. + + void av_unshift(AV* ar, I32 num) + +=item call_argv + +Performs a callback to the specified Perl sub. See L<perlcall>. + +NOTE: the perl_ form of this function is deprecated. + + I32 call_argv(const char* sub_name, I32 flags, char** argv) + +=item call_method + +Performs a callback to the specified Perl method. The blessed object must +be on the stack. See L<perlcall>. + +NOTE: the perl_ form of this function is deprecated. + + I32 call_method(const char* methname, I32 flags) + +=item call_pv + +Performs a callback to the specified Perl sub. See L<perlcall>. + +NOTE: the perl_ form of this function is deprecated. + + I32 call_pv(const char* sub_name, I32 flags) + +=item call_sv + +Performs a callback to the Perl sub whose name is in the SV. See +L<perlcall>. + +NOTE: the perl_ form of this function is deprecated. + + I32 call_sv(SV* sv, I32 flags) + +=item CLASS + +Variable which is setup by C<xsubpp> to indicate the +class name for a C++ XS constructor. This is always a C<char*>. See C<THIS>. + + char* CLASS + +=item Copy + +The XSUB-writer's interface to the C C<memcpy> function. The C<src> is the +source, C<dest> is the destination, C<nitems> is the number of items, and C<type> is +the type. May fail on overlapping copies. See also C<Move>. + + void Copy(void* src, void* dest, int nitems, type) + +=item croak + +This is the XSUB-writer's interface to Perl's C<die> function. Use this +function the same way you use the C C<printf> function. See +C<warn>. + + void croak(const char* pat, ...) + +=item CvSTASH + +Returns the stash of the CV. + + HV* CvSTASH(CV* cv) + +=item dMARK + +Declare a stack marker variable, C<mark>, for the XSUB. See C<MARK> and +C<dORIGMARK>. + + dMARK; + +=item dORIGMARK + +Saves the original stack mark for the XSUB. See C<ORIGMARK>. + + dORIGMARK; + +=item dSP + +Declares a local copy of perl's stack pointer for the XSUB, available via +the C<SP> macro. See C<SP>. + + dSP; + +=item dXSARGS + +Sets up stack and mark pointers for an XSUB, calling dSP and dMARK. This +is usually handled automatically by C<xsubpp>. Declares the C<items> +variable to indicate the number of items on the stack. + + dXSARGS; + +=item dXSI32 + +Sets up the C<ix> variable for an XSUB which has aliases. This is usually +handled automatically by C<xsubpp>. + + dXSI32; + +=item ENTER + +Opening bracket on a callback. See C<LEAVE> and L<perlcall>. + + ENTER; + +=item eval_pv + +Tells Perl to C<eval> the given string and return an SV* result. + +NOTE: the perl_ form of this function is deprecated. + + SV* eval_pv(const char* p, I32 croak_on_error) + +=item eval_sv + +Tells Perl to C<eval> the string in the SV. + +NOTE: the perl_ form of this function is deprecated. + + I32 eval_sv(SV* sv, I32 flags) + +=item EXTEND + +Used to extend the argument stack for an XSUB's return values. Once +used, guarrantees that there is room for at least C<nitems> to be pushed +onto the stack. + + void EXTEND(SP, int nitems) + +=item fbm_compile + +Analyses the string in order to make fast searches on it using fbm_instr() +-- the Boyer-Moore algorithm. + + void fbm_compile(SV* sv, U32 flags) + +=item fbm_instr + +Returns the location of the SV in the string delimited by C<str> and +C<strend>. It returns C<Nullch> if the string can't be found. The C<sv> +does not have to be fbm_compiled, but the search will not be as fast +then. + + char* fbm_instr(unsigned char* big, unsigned char* bigend, SV* littlesv, U32 flags) + +=item FREETMPS + +Closing bracket for temporaries on a callback. See C<SAVETMPS> and +L<perlcall>. + + FREETMPS; + +=item get_av + +Returns the AV of the specified Perl array. If C<create> is set and the +Perl variable does not exist then it will be created. If C<create> is not +set and the variable does not exist then NULL is returned. + +NOTE: the perl_ form of this function is deprecated. + + AV* get_av(const char* name, I32 create) + +=item get_cv + +Returns the CV of the specified Perl subroutine. If C<create> is set and +the Perl subroutine does not exist then it will be declared (which has the +same effect as saying C<sub name;>). If C<create> is not set and the +subroutine does not exist then NULL is returned. + +NOTE: the perl_ form of this function is deprecated. + + CV* get_cv(const char* name, I32 create) + +=item get_hv + +Returns the HV of the specified Perl hash. If C<create> is set and the +Perl variable does not exist then it will be created. If C<create> is not +set and the variable does not exist then NULL is returned. + +NOTE: the perl_ form of this function is deprecated. + + HV* get_hv(const char* name, I32 create) + +=item get_sv + +Returns the SV of the specified Perl scalar. If C<create> is set and the +Perl variable does not exist then it will be created. If C<create> is not +set and the variable does not exist then NULL is returned. + +NOTE: the perl_ form of this function is deprecated. + + SV* get_sv(const char* name, I32 create) + +=item GIMME + +A backward-compatible version of C<GIMME_V> which can only return +C<G_SCALAR> or C<G_ARRAY>; in a void context, it returns C<G_SCALAR>. +Deprecated. Use C<GIMME_V> instead. + + U32 GIMME + +=item GIMME_V + +The XSUB-writer's equivalent to Perl's C<wantarray>. Returns C<G_VOID>, +C<G_SCALAR> or C<G_ARRAY> for void, scalar or array context, +respectively. + + U32 GIMME_V + +=item GvSV + +Return the SV from the GV. + + SV* GvSV(GV* gv) + +=item gv_fetchmeth + +Returns the glob with the given C<name> and a defined subroutine or +C<NULL>. The glob lives in the given C<stash>, or in the stashes +accessible via @ISA and @UNIVERSAL. + +The argument C<level> should be either 0 or -1. If C<level==0>, as a +side-effect creates a glob with the given C<name> in the given C<stash> +which in the case of success contains an alias for the subroutine, and sets +up caching info for this glob. Similarly for all the searched stashes. + +This function grants C<"SUPER"> token as a postfix of the stash name. The +GV returned from C<gv_fetchmeth> may be a method cache entry, which is not +visible to Perl code. So when calling C<call_sv>, you should not use +the GV directly; instead, you should use the method's CV, which can be +obtained from the GV with the C<GvCV> macro. + + GV* gv_fetchmeth(HV* stash, const char* name, STRLEN len, I32 level) + +=item gv_fetchmethod + +See L<gv_fetchmethod_autoload>. + + GV* gv_fetchmethod(HV* stash, const char* name) + +=item gv_fetchmethod_autoload + +Returns the glob which contains the subroutine to call to invoke the method +on the C<stash>. In fact in the presence of autoloading this may be the +glob for "AUTOLOAD". In this case the corresponding variable $AUTOLOAD is +already setup. + +The third parameter of C<gv_fetchmethod_autoload> determines whether +AUTOLOAD lookup is performed if the given method is not present: non-zero +means yes, look for AUTOLOAD; zero means no, don't look for AUTOLOAD. +Calling C<gv_fetchmethod> is equivalent to calling C<gv_fetchmethod_autoload> +with a non-zero C<autoload> parameter. + +These functions grant C<"SUPER"> token as a prefix of the method name. Note +that if you want to keep the returned glob for a long time, you need to +check for it being "AUTOLOAD", since at the later time the call may load a +different subroutine due to $AUTOLOAD changing its value. Use the glob +created via a side effect to do this. + +These functions have the same side-effects and as C<gv_fetchmeth> with +C<level==0>. C<name> should be writable if contains C<':'> or C<' +''>. The warning against passing the GV returned by C<gv_fetchmeth> to +C<call_sv> apply equally to these functions. + + GV* gv_fetchmethod_autoload(HV* stash, const char* name, I32 autoload) + +=item gv_stashpv + +Returns a pointer to the stash for a specified package. C<name> should +be a valid UTF-8 string. If C<create> is set then the package will be +created if it does not already exist. If C<create> is not set and the +package does not exist then NULL is returned. + + HV* gv_stashpv(const char* name, I32 create) + +=item gv_stashsv + +Returns a pointer to the stash for a specified package, which must be a +valid UTF-8 string. See C<gv_stashpv>. + + HV* gv_stashsv(SV* sv, I32 create) + +=item G_ARRAY + +Used to indicate array context. See C<GIMME_V>, C<GIMME> and +L<perlcall>. + +=item G_DISCARD + +Indicates that arguments returned from a callback should be discarded. See +L<perlcall>. + +=item G_EVAL + +Used to force a Perl C<eval> wrapper around a callback. See +L<perlcall>. + +=item G_NOARGS + +Indicates that no arguments are being sent to a callback. See +L<perlcall>. + +=item G_SCALAR + +Used to indicate scalar context. See C<GIMME_V>, C<GIMME>, and +L<perlcall>. + +=item G_VOID + +Used to indicate void context. See C<GIMME_V> and L<perlcall>. + +=item HEf_SVKEY + +This flag, used in the length slot of hash entries and magic structures, +specifies the structure contains a C<SV*> pointer where a C<char*> pointer +is to be expected. (For information only--not to be used). + +=item HeHASH + +Returns the computed hash stored in the hash entry. + + U32 HeHASH(HE* he) + +=item HeKEY + +Returns the actual pointer stored in the key slot of the hash entry. The +pointer may be either C<char*> or C<SV*>, depending on the value of +C<HeKLEN()>. Can be assigned to. The C<HePV()> or C<HeSVKEY()> macros are +usually preferable for finding the value of a key. + + void* HeKEY(HE* he) + +=item HeKLEN + +If this is negative, and amounts to C<HEf_SVKEY>, it indicates the entry +holds an C<SV*> key. Otherwise, holds the actual length of the key. Can +be assigned to. The C<HePV()> macro is usually preferable for finding key +lengths. + + STRLEN HeKLEN(HE* he) + +=item HePV + +Returns the key slot of the hash entry as a C<char*> value, doing any +necessary dereferencing of possibly C<SV*> keys. The length of the string +is placed in C<len> (this is a macro, so do I<not> use C<&len>). If you do +not care about what the length of the key is, you may use the global +variable C<PL_na>, though this is rather less efficient than using a local +variable. Remember though, that hash keys in perl are free to contain +embedded nulls, so using C<strlen()> or similar is not a good way to find +the length of hash keys. This is very similar to the C<SvPV()> macro +described elsewhere in this document. + + char* HePV(HE* he, STRLEN len) + +=item HeSVKEY + +Returns the key as an C<SV*>, or C<Nullsv> if the hash entry does not +contain an C<SV*> key. + + SV* HeSVKEY(HE* he) + +=item HeSVKEY_force + +Returns the key as an C<SV*>. Will create and return a temporary mortal +C<SV*> if the hash entry contains only a C<char*> key. + + SV* HeSVKEY_force(HE* he) + +=item HeSVKEY_set + +Sets the key to a given C<SV*>, taking care to set the appropriate flags to +indicate the presence of an C<SV*> key, and returns the same +C<SV*>. + + SV* HeSVKEY_set(HE* he, SV* sv) + +=item HeVAL + +Returns the value slot (type C<SV*>) stored in the hash entry. + + SV* HeVAL(HE* he) + +=item HvNAME + +Returns the package name of a stash. See C<SvSTASH>, C<CvSTASH>. + + char* HvNAME(HV* stash) + +=item hv_clear + +Clears a hash, making it empty. + + void hv_clear(HV* tb) + +=item hv_delete + +Deletes a key/value pair in the hash. The value SV is removed from the +hash and returned to the caller. The C<klen> is the length of the key. +The C<flags> value will normally be zero; if set to G_DISCARD then NULL +will be returned. + + SV* hv_delete(HV* tb, const char* key, U32 klen, I32 flags) + +=item hv_delete_ent + +Deletes a key/value pair in the hash. The value SV is removed from the +hash and returned to the caller. The C<flags> value will normally be zero; +if set to G_DISCARD then NULL will be returned. C<hash> can be a valid +precomputed hash value, or 0 to ask for it to be computed. + + SV* hv_delete_ent(HV* tb, SV* key, I32 flags, U32 hash) + +=item hv_exists + +Returns a boolean indicating whether the specified hash key exists. The +C<klen> is the length of the key. + + bool hv_exists(HV* tb, const char* key, U32 klen) + +=item hv_exists_ent + +Returns a boolean indicating whether the specified hash key exists. C<hash> +can be a valid precomputed hash value, or 0 to ask for it to be +computed. + + bool hv_exists_ent(HV* tb, SV* key, U32 hash) + +=item hv_fetch + +Returns the SV which corresponds to the specified key in the hash. The +C<klen> is the length of the key. If C<lval> is set then the fetch will be +part of a store. Check that the return value is non-null before +dereferencing it to a C<SV*>. + +See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more +information on how to use this function on tied hashes. + + SV** hv_fetch(HV* tb, const char* key, U32 klen, I32 lval) + +=item hv_fetch_ent + +Returns the hash entry which corresponds to the specified key in the hash. +C<hash> must be a valid precomputed hash number for the given C<key>, or 0 +if you want the function to compute it. IF C<lval> is set then the fetch +will be part of a store. Make sure the return value is non-null before +accessing it. The return value when C<tb> is a tied hash is a pointer to a +static location, so be sure to make a copy of the structure if you need to +store it somewhere. + +See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more +information on how to use this function on tied hashes. + + HE* hv_fetch_ent(HV* tb, SV* key, I32 lval, U32 hash) + +=item hv_iterinit + +Prepares a starting point to traverse a hash table. Returns the number of +keys in the hash (i.e. the same as C<HvKEYS(tb)>). The return value is +currently only meaningful for hashes without tie magic. + +NOTE: Before version 5.004_65, C<hv_iterinit> used to return the number of +hash buckets that happen to be in use. If you still need that esoteric +value, you can get it through the macro C<HvFILL(tb)>. + + I32 hv_iterinit(HV* tb) + +=item hv_iterkey + +Returns the key from the current position of the hash iterator. See +C<hv_iterinit>. + + char* hv_iterkey(HE* entry, I32* retlen) + +=item hv_iterkeysv + +Returns the key as an C<SV*> from the current position of the hash +iterator. The return value will always be a mortal copy of the key. Also +see C<hv_iterinit>. + + SV* hv_iterkeysv(HE* entry) + +=item hv_iternext + +Returns entries from a hash iterator. See C<hv_iterinit>. + + HE* hv_iternext(HV* tb) + +=item hv_iternextsv + +Performs an C<hv_iternext>, C<hv_iterkey>, and C<hv_iterval> in one +operation. + + SV* hv_iternextsv(HV* hv, char** key, I32* retlen) + +=item hv_iterval + +Returns the value from the current position of the hash iterator. See +C<hv_iterkey>. + + SV* hv_iterval(HV* tb, HE* entry) + +=item hv_magic + +Adds magic to a hash. See C<sv_magic>. + + void hv_magic(HV* hv, GV* gv, int how) + +=item hv_store + +Stores an SV in a hash. The hash key is specified as C<key> and C<klen> is +the length of the key. The C<hash> parameter is the precomputed hash +value; if it is zero then Perl will compute it. The return value will be +NULL if the operation failed or if the value did not need to be actually +stored within the hash (as in the case of tied hashes). Otherwise it can +be dereferenced to get the original C<SV*>. Note that the caller is +responsible for suitably incrementing the reference count of C<val> before +the call, and decrementing it if the function returned NULL. + +See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more +information on how to use this function on tied hashes. + + SV** hv_store(HV* tb, const char* key, U32 klen, SV* val, U32 hash) + +=item hv_store_ent + +Stores C<val> in a hash. The hash key is specified as C<key>. The C<hash> +parameter is the precomputed hash value; if it is zero then Perl will +compute it. The return value is the new hash entry so created. It will be +NULL if the operation failed or if the value did not need to be actually +stored within the hash (as in the case of tied hashes). Otherwise the +contents of the return value can be accessed using the C<He???> macros +described here. Note that the caller is responsible for suitably +incrementing the reference count of C<val> before the call, and +decrementing it if the function returned NULL. + +See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more +information on how to use this function on tied hashes. + + HE* hv_store_ent(HV* tb, SV* key, SV* val, U32 hash) + +=item hv_undef + +Undefines the hash. + + void hv_undef(HV* tb) + +=item isALNUM + +Returns a boolean indicating whether the C C<char> is an ascii alphanumeric +character or digit. + + bool isALNUM(char ch) + +=item isALPHA + +Returns a boolean indicating whether the C C<char> is an ascii alphabetic +character. + + bool isALPHA(char ch) + +=item isDIGIT + +Returns a boolean indicating whether the C C<char> is an ascii +digit. + + bool isDIGIT(char ch) + +=item isLOWER + +Returns a boolean indicating whether the C C<char> is a lowercase +character. + + bool isLOWER(char ch) + +=item isSPACE + +Returns a boolean indicating whether the C C<char> is whitespace. + + bool isSPACE(char ch) + +=item isUPPER + +Returns a boolean indicating whether the C C<char> is an uppercase +character. + + bool isUPPER(char ch) + +=item items + +Variable which is setup by C<xsubpp> to indicate the number of +items on the stack. See L<perlxs/"Variable-length Parameter Lists">. + + I32 items + +=item ix + +Variable which is setup by C<xsubpp> to indicate which of an +XSUB's aliases was used to invoke it. See L<perlxs/"The ALIAS: Keyword">. + + I32 ix + +=item LEAVE + +Closing bracket on a callback. See C<ENTER> and L<perlcall>. + + LEAVE; + +=item looks_like_number + +Test if an the content of an SV looks like a number (or is a +number). + + I32 looks_like_number(SV* sv) + +=item MARK + +Stack marker variable for the XSUB. See C<dMARK>. + +=item mg_clear + +Clear something magical that the SV represents. See C<sv_magic>. + + int mg_clear(SV* sv) + +=item mg_copy + +Copies the magic from one SV to another. See C<sv_magic>. + + int mg_copy(SV* sv, SV* nsv, const char* key, I32 klen) + +=item mg_find + +Finds the magic pointer for type matching the SV. See C<sv_magic>. + + MAGIC* mg_find(SV* sv, int type) + +=item mg_free + +Free any magic storage used by the SV. See C<sv_magic>. + + int mg_free(SV* sv) + +=item mg_get + +Do magic after a value is retrieved from the SV. See C<sv_magic>. + + int mg_get(SV* sv) + +=item mg_length + +Report on the SV's length. See C<sv_magic>. + + U32 mg_length(SV* sv) + +=item mg_magical + +Turns on the magical status of an SV. See C<sv_magic>. + + void mg_magical(SV* sv) + +=item mg_set + +Do magic after a value is assigned to the SV. See C<sv_magic>. + + int mg_set(SV* sv) + +=item Move + +The XSUB-writer's interface to the C C<memmove> function. The C<src> is the +source, C<dest> is the destination, C<nitems> is the number of items, and C<type> is +the type. Can do overlapping moves. See also C<Copy>. + + void Move(void* src, void* dest, int nitems, type) + +=item New + +The XSUB-writer's interface to the C C<malloc> function. + + void New(int id, void* ptr, int nitems, type) + +=item newAV + +Creates a new AV. The reference count is set to 1. + + AV* newAV() + +=item Newc + +The XSUB-writer's interface to the C C<malloc> function, with +cast. + + void Newc(int id, void* ptr, int nitems, type, cast) + +=item newCONSTSUB + +Creates a constant sub equivalent to Perl C<sub FOO () { 123 }> which is +eligible for inlining at compile-time. + + void newCONSTSUB(HV* stash, char* name, SV* sv) + +=item newHV + +Creates a new HV. The reference count is set to 1. + + HV* newHV() + +=item newRV_inc + +Creates an RV wrapper for an SV. The reference count for the original SV is +incremented. + + SV* newRV_inc(SV* sv) + +=item newRV_noinc + +Creates an RV wrapper for an SV. The reference count for the original +SV is B<not> incremented. + + SV* newRV_noinc(SV *sv) + +=item NEWSV + +Creates a new SV. A non-zero C<len> parameter indicates the number of +bytes of preallocated string space the SV should have. An extra byte for a +tailing NUL is also reserved. (SvPOK is not set for the SV even if string +space is allocated.) The reference count for the new SV is set to 1. +C<id> is an integer id between 0 and 1299 (used to identify leaks). + + SV* NEWSV(int id, STRLEN len) + +=item newSViv + +Creates a new SV and copies an integer into it. The reference count for the +SV is set to 1. + + SV* newSViv(IV i) + +=item newSVnv + +Creates a new SV and copies a floating point value into it. +The reference count for the SV is set to 1. + + SV* newSVnv(NV n) + +=item newSVpv + +Creates a new SV and copies a string into it. The reference count for the +SV is set to 1. If C<len> is zero, Perl will compute the length using +strlen(). For efficiency, consider using C<newSVpvn> instead. + + SV* newSVpv(const char* s, STRLEN len) + +=item newSVpvf + +Creates a new SV an initialize it with the string formatted like +C<sprintf>. + + SV* newSVpvf(const char* pat, ...) + +=item newSVpvn + +Creates a new SV and copies a string into it. The reference count for the +SV is set to 1. Note that if C<len> is zero, Perl will create a zero length +string. You are responsible for ensuring that the source string is at least +C<len> bytes long. + + SV* newSVpvn(const char* s, STRLEN len) + +=item newSVrv + +Creates a new SV for the RV, C<rv>, to point to. If C<rv> is not an RV then +it will be upgraded to one. If C<classname> is non-null then the new SV will +be blessed in the specified package. The new SV is returned and its +reference count is 1. + + SV* newSVrv(SV* rv, const char* classname) + +=item newSVsv + +Creates a new SV which is an exact duplicate of the original SV. + + SV* newSVsv(SV* old) + +=item newSVuv + +Creates a new SV and copies an unsigned integer into it. +The reference count for the SV is set to 1. + + SV* newSVuv(UV u) + +=item newXS + +Used by C<xsubpp> to hook up XSUBs as Perl subs. + +=item newXSproto + +Used by C<xsubpp> to hook up XSUBs as Perl subs. Adds Perl prototypes to +the subs. + +=item Newz + +The XSUB-writer's interface to the C C<malloc> function. The allocated +memory is zeroed with C<memzero>. + + void Newz(int id, void* ptr, int nitems, type) + +=item Nullav + +Null AV pointer. + +=item Nullch + +Null character pointer. + +=item Nullcv + +Null CV pointer. + +=item Nullhv + +Null HV pointer. + +=item Nullsv + +Null SV pointer. + +=item ORIGMARK + +The original stack mark for the XSUB. See C<dORIGMARK>. + +=item perl_alloc + +Allocates a new Perl interpreter. See L<perlembed>. + + PerlInterpreter* perl_alloc() + +=item perl_construct + +Initializes a new Perl interpreter. See L<perlembed>. + + void perl_construct(PerlInterpreter* interp) + +=item perl_destruct + +Shuts down a Perl interpreter. See L<perlembed>. + + void perl_destruct(PerlInterpreter* interp) + +=item perl_free + +Releases a Perl interpreter. See L<perlembed>. + + void perl_free(PerlInterpreter* interp) + +=item perl_parse + +Tells a Perl interpreter to parse a Perl script. See L<perlembed>. + + int perl_parse(PerlInterpreter* interp, XSINIT_t xsinit, int argc, char** argv, char** env) + +=item perl_run + +Tells a Perl interpreter to run. See L<perlembed>. + + int perl_run(PerlInterpreter* interp) + +=item PL_DBsingle + +When Perl is run in debugging mode, with the B<-d> switch, this SV is a +boolean which indicates whether subs are being single-stepped. +Single-stepping is automatically turned on after every step. This is the C +variable which corresponds to Perl's $DB::single variable. See +C<PL_DBsub>. + + SV * PL_DBsingle + +=item PL_DBsub + +When Perl is run in debugging mode, with the B<-d> switch, this GV contains +the SV which holds the name of the sub being debugged. This is the C +variable which corresponds to Perl's $DB::sub variable. See +C<PL_DBsingle>. + + GV * PL_DBsub + +=item PL_DBtrace + +Trace variable used when Perl is run in debugging mode, with the B<-d> +switch. This is the C variable which corresponds to Perl's $DB::trace +variable. See C<PL_DBsingle>. + + SV * PL_DBtrace + +=item PL_dowarn + +The C variable which corresponds to Perl's $^W warning variable. + + bool PL_dowarn + +=item PL_modglobal + +C<PL_modglobal> is a general purpose, interpreter global HV for use by +extensions that need to keep information on a per-interpreter basis. +In a pinch, it can also be used as a symbol table for extensions +to share data among each other. It is a good idea to use keys +prefixed by the package name of the extension that owns the data. + + HV* PL_modglobal + +=item PL_na + +A convenience variable which is typically used with C<SvPV> when one +doesn't care about the length of the string. It is usually more efficient +to either declare a local variable and use that instead or to use the +C<SvPV_nolen> macro. + + STRLEN PL_na + +=item PL_sv_no + +This is the C<false> SV. See C<PL_sv_yes>. Always refer to this as +C<&PL_sv_no>. + + SV PL_sv_no + +=item PL_sv_undef + +This is the C<undef> SV. Always refer to this as C<&PL_sv_undef>. + + SV PL_sv_undef + +=item PL_sv_yes + +This is the C<true> SV. See C<PL_sv_no>. Always refer to this as +C<&PL_sv_yes>. + + SV PL_sv_yes + +=item POPi + +Pops an integer off the stack. + + IV POPi + +=item POPl + +Pops a long off the stack. + + long POPl + +=item POPn + +Pops a double off the stack. + + NV POPn + +=item POPp + +Pops a string off the stack. + + char* POPp + +=item POPs + +Pops an SV off the stack. + + SV* POPs + +=item PUSHi + +Push an integer onto the stack. The stack must have room for this element. +Handles 'set' magic. See C<XPUSHi>. + + void PUSHi(IV iv) + +=item PUSHMARK + +Opening bracket for arguments on a callback. See C<PUTBACK> and +L<perlcall>. + + PUSHMARK; + +=item PUSHn + +Push a double onto the stack. The stack must have room for this element. +Handles 'set' magic. See C<XPUSHn>. + + void PUSHn(NV nv) + +=item PUSHp + +Push a string onto the stack. The stack must have room for this element. +The C<len> indicates the length of the string. Handles 'set' magic. See +C<XPUSHp>. + + void PUSHp(char* str, STRLEN len) + +=item PUSHs + +Push an SV onto the stack. The stack must have room for this element. +Does not handle 'set' magic. See C<XPUSHs>. + + void PUSHs(SV* sv) + +=item PUSHu + +Push an unsigned integer onto the stack. The stack must have room for this +element. See C<XPUSHu>. + + void PUSHu(UV uv) + +=item PUTBACK + +Closing bracket for XSUB arguments. This is usually handled by C<xsubpp>. +See C<PUSHMARK> and L<perlcall> for other uses. + + PUTBACK; + +=item Renew + +The XSUB-writer's interface to the C C<realloc> function. + + void Renew(void* ptr, int nitems, type) + +=item Renewc + +The XSUB-writer's interface to the C C<realloc> function, with +cast. + + void Renewc(void* ptr, int nitems, type, cast) + +=item require_pv + +Tells Perl to C<require> a module. + +NOTE: the perl_ form of this function is deprecated. + + void require_pv(const char* pv) + +=item RETVAL + +Variable which is setup by C<xsubpp> to hold the return value for an +XSUB. This is always the proper type for the XSUB. See +L<perlxs/"The RETVAL Variable">. + + (whatever) RETVAL + +=item Safefree + +The XSUB-writer's interface to the C C<free> function. + + void Safefree(void* src, void* dest, int nitems, type) + +=item savepv + +Copy a string to a safe spot. This does not use an SV. + + char* savepv(const char* sv) + +=item savepvn + +Copy a string to a safe spot. The C<len> indicates number of bytes to +copy. This does not use an SV. + + char* savepvn(const char* sv, I32 len) + +=item SAVETMPS + +Opening bracket for temporaries on a callback. See C<FREETMPS> and +L<perlcall>. + + SAVETMPS; + +=item SP + +Stack pointer. This is usually handled by C<xsubpp>. See C<dSP> and +C<SPAGAIN>. + +=item SPAGAIN + +Refetch the stack pointer. Used after a callback. See L<perlcall>. + + SPAGAIN; + +=item ST + +Used to access elements on the XSUB's stack. + + SV* ST(int ix) + +=item strEQ + +Test two strings to see if they are equal. Returns true or false. + + bool strEQ(char* s1, char* s2) + +=item strGE + +Test two strings to see if the first, C<s1>, is greater than or equal to +the second, C<s2>. Returns true or false. + + bool strGE(char* s1, char* s2) + +=item strGT + +Test two strings to see if the first, C<s1>, is greater than the second, +C<s2>. Returns true or false. + + bool strGT(char* s1, char* s2) + +=item strLE + +Test two strings to see if the first, C<s1>, is less than or equal to the +second, C<s2>. Returns true or false. + + bool strLE(char* s1, char* s2) + +=item strLT + +Test two strings to see if the first, C<s1>, is less than the second, +C<s2>. Returns true or false. + + bool strLT(char* s1, char* s2) + +=item strNE + +Test two strings to see if they are different. Returns true or +false. + + bool strNE(char* s1, char* s2) + +=item strnEQ + +Test two strings to see if they are equal. The C<len> parameter indicates +the number of bytes to compare. Returns true or false. (A wrapper for +C<strncmp>). + + bool strnEQ(char* s1, char* s2, STRLEN len) + +=item strnNE + +Test two strings to see if they are different. The C<len> parameter +indicates the number of bytes to compare. Returns true or false. (A +wrapper for C<strncmp>). + + bool strnNE(char* s1, char* s2, STRLEN len) + +=item StructCopy + +This is an architecture-independant macro to copy one structure to another. + + void StructCopy(type src, type dest, type) + +=item SvCUR + +Returns the length of the string which is in the SV. See C<SvLEN>. + + STRLEN SvCUR(SV* sv) + +=item SvCUR_set + +Set the length of the string which is in the SV. See C<SvCUR>. + + void SvCUR_set(SV* sv, STRLEN len) + +=item SvEND + +Returns a pointer to the last character in the string which is in the SV. +See C<SvCUR>. Access the character as *(SvEND(sv)). + + char* SvEND(SV* sv) + +=item SvGETMAGIC + +Invokes C<mg_get> on an SV if it has 'get' magic. This macro evaluates its +argument more than once. + + void SvGETMAGIC(SV* sv) + +=item SvGROW + +Expands the character buffer in the SV so that it has room for the +indicated number of bytes (remember to reserve space for an extra trailing +NUL character). Calls C<sv_grow> to perform the expansion if necessary. +Returns a pointer to the character buffer. + + void SvGROW(SV* sv, STRLEN len) + +=item SvIOK + +Returns a boolean indicating whether the SV contains an integer. + + bool SvIOK(SV* sv) + +=item SvIOKp + +Returns a boolean indicating whether the SV contains an integer. Checks +the B<private> setting. Use C<SvIOK>. + + bool SvIOKp(SV* sv) + +=item SvIOK_off + +Unsets the IV status of an SV. + + void SvIOK_off(SV* sv) + +=item SvIOK_on + +Tells an SV that it is an integer. + + void SvIOK_on(SV* sv) + +=item SvIOK_only + +Tells an SV that it is an integer and disables all other OK bits. + + void SvIOK_only(SV* sv) + +=item SvIV + +Coerces the given SV to an integer and returns it. + + IV SvIV(SV* sv) + +=item SvIVX + +Returns the integer which is stored in the SV, assuming SvIOK is +true. + + IV SvIVX(SV* sv) + +=item SvLEN + +Returns the size of the string buffer in the SV. See C<SvCUR>. + + STRLEN SvLEN(SV* sv) + +=item SvNIOK + +Returns a boolean indicating whether the SV contains a number, integer or +double. + + bool SvNIOK(SV* sv) + +=item SvNIOKp + +Returns a boolean indicating whether the SV contains a number, integer or +double. Checks the B<private> setting. Use C<SvNIOK>. + + bool SvNIOKp(SV* sv) + +=item SvNIOK_off + +Unsets the NV/IV status of an SV. + + void SvNIOK_off(SV* sv) + +=item SvNOK + +Returns a boolean indicating whether the SV contains a double. + + bool SvNOK(SV* sv) + +=item SvNOKp + +Returns a boolean indicating whether the SV contains a double. Checks the +B<private> setting. Use C<SvNOK>. + + bool SvNOKp(SV* sv) + +=item SvNOK_off + +Unsets the NV status of an SV. + + void SvNOK_off(SV* sv) + +=item SvNOK_on + +Tells an SV that it is a double. + + void SvNOK_on(SV* sv) + +=item SvNOK_only + +Tells an SV that it is a double and disables all other OK bits. + + void SvNOK_only(SV* sv) + +=item SvNV + +Coerce the given SV to a double and return it. + + NV SvNV(SV* sv) + +=item SvNVX + +Returns the double which is stored in the SV, assuming SvNOK is +true. + + NV SvNVX(SV* sv) + +=item SvOK + +Returns a boolean indicating whether the value is an SV. + + bool SvOK(SV* sv) + +=item SvOOK + +Returns a boolean indicating whether the SvIVX is a valid offset value for +the SvPVX. This hack is used internally to speed up removal of characters +from the beginning of a SvPV. When SvOOK is true, then the start of the +allocated string buffer is really (SvPVX - SvIVX). + + bool SvOOK(SV* sv) + +=item SvPOK + +Returns a boolean indicating whether the SV contains a character +string. + + bool SvPOK(SV* sv) + +=item SvPOKp + +Returns a boolean indicating whether the SV contains a character string. +Checks the B<private> setting. Use C<SvPOK>. + + bool SvPOKp(SV* sv) + +=item SvPOK_off + +Unsets the PV status of an SV. + + void SvPOK_off(SV* sv) + +=item SvPOK_on + +Tells an SV that it is a string. + + void SvPOK_on(SV* sv) + +=item SvPOK_only + +Tells an SV that it is a string and disables all other OK bits. + + void SvPOK_only(SV* sv) + +=item SvPV + +Returns a pointer to the string in the SV, or a stringified form of the SV +if the SV does not contain a string. Handles 'get' magic. + + char* SvPV(SV* sv, STRLEN len) + +=item SvPVX + +Returns a pointer to the string in the SV. The SV must contain a +string. + + char* SvPVX(SV* sv) + +=item SvPV_force + +Like <SvPV> but will force the SV into becoming a string (SvPOK). You want +force if you are going to update the SvPVX directly. + + char* SvPV_force(SV* sv, STRLEN len) + +=item SvPV_nolen + +Returns a pointer to the string in the SV, or a stringified form of the SV +if the SV does not contain a string. Handles 'get' magic. + + char* SvPV_nolen(SV* sv) + +=item SvREFCNT + +Returns the value of the object's reference count. + + U32 SvREFCNT(SV* sv) + +=item SvREFCNT_dec + +Decrements the reference count of the given SV. + + void SvREFCNT_dec(SV* sv) + +=item SvREFCNT_inc + +Increments the reference count of the given SV. + + SV* SvREFCNT_inc(SV* sv) + +=item SvROK + +Tests if the SV is an RV. + + bool SvROK(SV* sv) + +=item SvROK_off + +Unsets the RV status of an SV. + + void SvROK_off(SV* sv) + +=item SvROK_on + +Tells an SV that it is an RV. + + void SvROK_on(SV* sv) + +=item SvRV + +Dereferences an RV to return the SV. + + SV* SvRV(SV* sv) + +=item SvSETMAGIC + +Invokes C<mg_set> on an SV if it has 'set' magic. This macro evaluates its +argument more than once. + + void SvSETMAGIC(SV* sv) + +=item SvSetSV + +Calls C<sv_setsv> if dsv is not the same as ssv. May evaluate arguments +more than once. + + void SvSetSV(SV* dsb, SV* ssv) + +=item SvSetSV_nosteal + +Calls a non-destructive version of C<sv_setsv> if dsv is not the same as +ssv. May evaluate arguments more than once. + + void SvSetSV_nosteal(SV* dsv, SV* ssv) + +=item SvSTASH + +Returns the stash of the SV. + + HV* SvSTASH(SV* sv) + +=item SvTAINT + +Taints an SV if tainting is enabled + + void SvTAINT(SV* sv) + +=item SvTAINTED + +Checks to see if an SV is tainted. Returns TRUE if it is, FALSE if +not. + + bool SvTAINTED(SV* sv) + +=item SvTAINTED_off + +Untaints an SV. Be I<very> careful with this routine, as it short-circuits +some of Perl's fundamental security features. XS module authors should not +use this function unless they fully understand all the implications of +unconditionally untainting the value. Untainting should be done in the +standard perl fashion, via a carefully crafted regexp, rather than directly +untainting variables. + + void SvTAINTED_off(SV* sv) + +=item SvTAINTED_on + +Marks an SV as tainted. + + void SvTAINTED_on(SV* sv) + +=item SvTRUE + +Returns a boolean indicating whether Perl would evaluate the SV as true or +false, defined or undefined. Does not handle 'get' magic. + + bool SvTRUE(SV* sv) + +=item SvTYPE + +Returns the type of the SV. See C<svtype>. + + svtype SvTYPE(SV* sv) + +=item svtype + +An enum of flags for Perl types. These are found in the file B<sv.h> +in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. + +=item SVt_IV + +Integer type flag for scalars. See C<svtype>. + +=item SVt_NV + +Double type flag for scalars. See C<svtype>. + +=item SVt_PV + +Pointer type flag for scalars. See C<svtype>. + +=item SVt_PVAV + +Type flag for arrays. See C<svtype>. + +=item SVt_PVCV + +Type flag for code refs. See C<svtype>. + +=item SVt_PVHV + +Type flag for hashes. See C<svtype>. + +=item SVt_PVMG + +Type flag for blessed scalars. See C<svtype>. + +=item SvUPGRADE + +Used to upgrade an SV to a more complex form. Uses C<sv_upgrade> to +perform the upgrade if necessary. See C<svtype>. + + void SvUPGRADE(SV* sv, svtype type) + +=item SvUV + +Coerces the given SV to an unsigned integer and returns it. + + UV SvUV(SV* sv) + +=item SvUVX + +Returns the unsigned integer which is stored in the SV, assuming SvIOK is +true. + + UV SvUVX(SV* sv) + +=item sv_2mortal + +Marks an SV as mortal. The SV will be destroyed when the current context +ends. + + SV* sv_2mortal(SV* sv) + +=item sv_bless + +Blesses an SV into a specified package. The SV must be an RV. The package +must be designated by its stash (see C<gv_stashpv()>). The reference count +of the SV is unaffected. + + SV* sv_bless(SV* sv, HV* stash) + +=item sv_catpv + +Concatenates the string onto the end of the string which is in the SV. +Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>. + + void sv_catpv(SV* sv, const char* ptr) + +=item sv_catpvf + +Processes its arguments like C<sprintf> and appends the formatted output +to an SV. Handles 'get' magic, but not 'set' magic. C<SvSETMAGIC()> must +typically be called after calling this function to handle 'set' magic. + + void sv_catpvf(SV* sv, const char* pat, ...) + +=item sv_catpvf_mg + +Like C<sv_catpvf>, but also handles 'set' magic. + + void sv_catpvf_mg(SV *sv, const char* pat, ...) + +=item sv_catpvn + +Concatenates the string onto the end of the string which is in the SV. The +C<len> indicates number of bytes to copy. Handles 'get' magic, but not +'set' magic. See C<sv_catpvn_mg>. + + void sv_catpvn(SV* sv, const char* ptr, STRLEN len) + +=item sv_catpvn_mg + +Like C<sv_catpvn>, but also handles 'set' magic. + + void sv_catpvn_mg(SV *sv, const char *ptr, STRLEN len) + +=item sv_catpv_mg + +Like C<sv_catpv>, but also handles 'set' magic. + + void sv_catpv_mg(SV *sv, const char *ptr) + +=item sv_catsv + +Concatenates the string from SV C<ssv> onto the end of the string in SV +C<dsv>. Handles 'get' magic, but not 'set' magic. See C<sv_catsv_mg>. + + void sv_catsv(SV* dsv, SV* ssv) + +=item sv_catsv_mg + +Like C<sv_catsv>, but also handles 'set' magic. + + void sv_catsv_mg(SV *dstr, SV *sstr) + +=item sv_chop + +Efficient removal of characters from the beginning of the string buffer. +SvPOK(sv) must be true and the C<ptr> must be a pointer to somewhere inside +the string buffer. The C<ptr> becomes the first character of the adjusted +string. + + void sv_chop(SV* sv, char* ptr) + +=item sv_cmp + +Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the +string in C<sv1> is less than, equal to, or greater than the string in +C<sv2>. + + I32 sv_cmp(SV* sv1, SV* sv2) + +=item sv_dec + +Auto-decrement of the value in the SV. + + void sv_dec(SV* sv) + +=item sv_derived_from + +Returns a boolean indicating whether the SV is derived from the specified +class. This is the function that implements C<UNIVERSAL::isa>. It works +for class names as well as for objects. + + bool sv_derived_from(SV* sv, const char* name) + +=item sv_eq + +Returns a boolean indicating whether the strings in the two SVs are +identical. + + I32 sv_eq(SV* sv1, SV* sv2) + +=item sv_grow + +Expands the character buffer in the SV. This will use C<sv_unref> and will +upgrade the SV to C<SVt_PV>. Returns a pointer to the character buffer. +Use C<SvGROW>. + + char* sv_grow(SV* sv, STRLEN newlen) + +=item sv_inc + +Auto-increment of the value in the SV. + + void sv_inc(SV* sv) + +=item sv_insert + +Inserts a string at the specified offset/length within the SV. Similar to +the Perl substr() function. + + void sv_insert(SV* bigsv, STRLEN offset, STRLEN len, char* little, STRLEN littlelen) + +=item sv_isa + +Returns a boolean indicating whether the SV is blessed into the specified +class. This does not check for subtypes; use C<sv_derived_from> to verify +an inheritance relationship. + + int sv_isa(SV* sv, const char* name) + +=item sv_isobject + +Returns a boolean indicating whether the SV is an RV pointing to a blessed +object. If the SV is not an RV, or if the object is not blessed, then this +will return false. + + int sv_isobject(SV* sv) + +=item sv_len + +Returns the length of the string in the SV. See also C<SvCUR>. + + STRLEN sv_len(SV* sv) + +=item sv_magic + +Adds magic to an SV. + + void sv_magic(SV* sv, SV* obj, int how, const char* name, I32 namlen) + +=item sv_mortalcopy + +Creates a new SV which is a copy of the original SV. The new SV is marked +as mortal. + + SV* sv_mortalcopy(SV* oldsv) + +=item sv_newmortal + +Creates a new SV which is mortal. The reference count of the SV is set to 1. + + SV* sv_newmortal() + +=item sv_setiv + +Copies an integer into the given SV. Does not handle 'set' magic. See +C<sv_setiv_mg>. + + void sv_setiv(SV* sv, IV num) + +=item sv_setiv_mg + +Like C<sv_setiv>, but also handles 'set' magic. + + void sv_setiv_mg(SV *sv, IV i) + +=item sv_setnv + +Copies a double into the given SV. Does not handle 'set' magic. See +C<sv_setnv_mg>. + + void sv_setnv(SV* sv, NV num) + +=item sv_setnv_mg + +Like C<sv_setnv>, but also handles 'set' magic. + + void sv_setnv_mg(SV *sv, NV num) + +=item sv_setpv + +Copies a string into an SV. The string must be null-terminated. Does not +handle 'set' magic. See C<sv_setpv_mg>. + + void sv_setpv(SV* sv, const char* ptr) + +=item sv_setpvf + +Processes its arguments like C<sprintf> and sets an SV to the formatted +output. Does not handle 'set' magic. See C<sv_setpvf_mg>. + + void sv_setpvf(SV* sv, const char* pat, ...) + +=item sv_setpvf_mg + +Like C<sv_setpvf>, but also handles 'set' magic. + + void sv_setpvf_mg(SV *sv, const char* pat, ...) + +=item sv_setpviv + +Copies an integer into the given SV, also updating its string value. +Does not handle 'set' magic. See C<sv_setpviv_mg>. + + void sv_setpviv(SV* sv, IV num) + +=item sv_setpviv_mg + +Like C<sv_setpviv>, but also handles 'set' magic. + + void sv_setpviv_mg(SV *sv, IV iv) + +=item sv_setpvn + +Copies a string into an SV. The C<len> parameter indicates the number of +bytes to be copied. Does not handle 'set' magic. See C<sv_setpvn_mg>. + + void sv_setpvn(SV* sv, const char* ptr, STRLEN len) + +=item sv_setpvn_mg + +Like C<sv_setpvn>, but also handles 'set' magic. + + void sv_setpvn_mg(SV *sv, const char *ptr, STRLEN len) + +=item sv_setpv_mg + +Like C<sv_setpv>, but also handles 'set' magic. + + void sv_setpv_mg(SV *sv, const char *ptr) + +=item sv_setref_iv + +Copies an integer into a new SV, optionally blessing the SV. The C<rv> +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C<classname> argument indicates the package for the +blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV +will be returned and will have a reference count of 1. + + SV* sv_setref_iv(SV* rv, const char* classname, IV iv) + +=item sv_setref_nv + +Copies a double into a new SV, optionally blessing the SV. The C<rv> +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C<classname> argument indicates the package for the +blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV +will be returned and will have a reference count of 1. + + SV* sv_setref_nv(SV* rv, const char* classname, NV nv) + +=item sv_setref_pv + +Copies a pointer into a new SV, optionally blessing the SV. The C<rv> +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. If the C<pv> argument is NULL then C<PL_sv_undef> will be placed +into the SV. The C<classname> argument indicates the package for the +blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV +will be returned and will have a reference count of 1. + +Do not use with other Perl types such as HV, AV, SV, CV, because those +objects will become corrupted by the pointer copy process. + +Note that C<sv_setref_pvn> copies the string while this copies the pointer. + + SV* sv_setref_pv(SV* rv, const char* classname, void* pv) + +=item sv_setref_pvn + +Copies a string into a new SV, optionally blessing the SV. The length of the +string must be specified with C<n>. The C<rv> argument will be upgraded to +an RV. That RV will be modified to point to the new SV. The C<classname> +argument indicates the package for the blessing. Set C<classname> to +C<Nullch> to avoid the blessing. The new SV will be returned and will have +a reference count of 1. + +Note that C<sv_setref_pv> copies the pointer while this copies the string. + + SV* sv_setref_pvn(SV* rv, const char* classname, char* pv, STRLEN n) + +=item sv_setsv + +Copies the contents of the source SV C<ssv> into the destination SV C<dsv>. +The source SV may be destroyed if it is mortal. Does not handle 'set' +magic. See the macro forms C<SvSetSV>, C<SvSetSV_nosteal> and +C<sv_setsv_mg>. + + void sv_setsv(SV* dsv, SV* ssv) + +=item sv_setsv_mg + +Like C<sv_setsv>, but also handles 'set' magic. + + void sv_setsv_mg(SV *dstr, SV *sstr) + +=item sv_setuv + +Copies an unsigned integer into the given SV. Does not handle 'set' magic. +See C<sv_setuv_mg>. + + void sv_setuv(SV* sv, UV num) + +=item sv_setuv_mg + +Like C<sv_setuv>, but also handles 'set' magic. + + void sv_setuv_mg(SV *sv, UV u) + +=item sv_unref + +Unsets the RV status of the SV, and decrements the reference count of +whatever was being referenced by the RV. This can almost be thought of +as a reversal of C<newSVrv>. See C<SvROK_off>. + + void sv_unref(SV* sv) + +=item sv_upgrade + +Upgrade an SV to a more complex form. Use C<SvUPGRADE>. See +C<svtype>. + + bool sv_upgrade(SV* sv, U32 mt) + +=item sv_usepvn + +Tells an SV to use C<ptr> to find its string value. Normally the string is +stored inside the SV but sv_usepvn allows the SV to use an outside string. +The C<ptr> should point to memory that was allocated by C<malloc>. The +string length, C<len>, must be supplied. This function will realloc the +memory pointed to by C<ptr>, so that pointer should not be freed or used by +the programmer after giving it to sv_usepvn. Does not handle 'set' magic. +See C<sv_usepvn_mg>. + + void sv_usepvn(SV* sv, char* ptr, STRLEN len) + +=item sv_usepvn_mg + +Like C<sv_usepvn>, but also handles 'set' magic. + + void sv_usepvn_mg(SV *sv, char *ptr, STRLEN len) + +=item sv_vcatpvfn + +Processes its arguments like C<vsprintf> and appends the formatted output +to an SV. Uses an array of SVs if the C style variable argument list is +missing (NULL). When running with taint checks enabled, indicates via +C<maybe_tainted> if results are untrustworthy (often due to the use of +locales). + + void sv_vcatpvfn(SV* sv, const char* pat, STRLEN patlen, va_list* args, SV** svargs, I32 svmax, bool *maybe_tainted) + +=item sv_vsetpvfn + +Works like C<vcatpvfn> but copies the text into the SV instead of +appending it. + + void sv_vsetpvfn(SV* sv, const char* pat, STRLEN patlen, va_list* args, SV** svargs, I32 svmax, bool *maybe_tainted) + +=item THIS + +Variable which is setup by C<xsubpp> to designate the object in a C++ +XSUB. This is always the proper type for the C++ object. See C<CLASS> and +L<perlxs/"Using XS With C++">. + + (whatever) THIS + +=item toLOWER + +Converts the specified character to lowercase. + + char toLOWER(char ch) + +=item toUPPER + +Converts the specified character to uppercase. + + char toUPPER(char ch) + +=item warn + +This is the XSUB-writer's interface to Perl's C<warn> function. Use this +function the same way you use the C C<printf> function. See +C<croak>. + + void warn(const char* pat, ...) + +=item XPUSHi + +Push an integer onto the stack, extending the stack if necessary. Handles +'set' magic. See C<PUSHi>. + + void XPUSHi(IV iv) + +=item XPUSHn + +Push a double onto the stack, extending the stack if necessary. Handles +'set' magic. See C<PUSHn>. + + void XPUSHn(NV nv) + +=item XPUSHp + +Push a string onto the stack, extending the stack if necessary. The C<len> +indicates the length of the string. Handles 'set' magic. See +C<PUSHp>. + + void XPUSHp(char* str, STRLEN len) + +=item XPUSHs + +Push an SV onto the stack, extending the stack if necessary. Does not +handle 'set' magic. See C<PUSHs>. + + void XPUSHs(SV* sv) + +=item XPUSHu + +Push an unsigned integer onto the stack, extending the stack if necessary. +See C<PUSHu>. + + void XPUSHu(UV uv) + +=item XS + +Macro to declare an XSUB and its C parameter list. This is handled by +C<xsubpp>. + +=item XSRETURN + +Return from XSUB, indicating number of items on the stack. This is usually +handled by C<xsubpp>. + + void XSRETURN(int nitems) + +=item XSRETURN_EMPTY + +Return an empty list from an XSUB immediately. + + XSRETURN_EMPTY; + +=item XSRETURN_IV + +Return an integer from an XSUB immediately. Uses C<XST_mIV>. + + void XSRETURN_IV(IV iv) + +=item XSRETURN_NO + +Return C<&PL_sv_no> from an XSUB immediately. Uses C<XST_mNO>. + + XSRETURN_NO; + +=item XSRETURN_NV + +Return an double from an XSUB immediately. Uses C<XST_mNV>. + + void XSRETURN_NV(NV nv) + +=item XSRETURN_PV + +Return a copy of a string from an XSUB immediately. Uses C<XST_mPV>. + + void XSRETURN_PV(char* str) + +=item XSRETURN_UNDEF + +Return C<&PL_sv_undef> from an XSUB immediately. Uses C<XST_mUNDEF>. + + XSRETURN_UNDEF; + +=item XSRETURN_YES + +Return C<&PL_sv_yes> from an XSUB immediately. Uses C<XST_mYES>. + + XSRETURN_YES; + +=item XST_mIV + +Place an integer into the specified position C<pos> on the stack. The +value is stored in a new mortal SV. + + void XST_mIV(int pos, IV iv) + +=item XST_mNO + +Place C<&PL_sv_no> into the specified position C<pos> on the +stack. + + void XST_mNO(int pos) + +=item XST_mNV + +Place a double into the specified position C<pos> on the stack. The value +is stored in a new mortal SV. + + void XST_mNV(int pos, NV nv) + +=item XST_mPV + +Place a copy of a string into the specified position C<pos> on the stack. +The value is stored in a new mortal SV. + + void XST_mPV(int pos, char* str) + +=item XST_mUNDEF + +Place C<&PL_sv_undef> into the specified position C<pos> on the +stack. + + void XST_mUNDEF(int pos) + +=item XST_mYES + +Place C<&PL_sv_yes> into the specified position C<pos> on the +stack. + + void XST_mYES(int pos) + +=item XS_VERSION + +The version identifier for an XS module. This is usually +handled automatically by C<ExtUtils::MakeMaker>. See C<XS_VERSION_BOOTCHECK>. + +=item XS_VERSION_BOOTCHECK + +Macro to verify that a PM module's $VERSION variable matches the XS +module's C<XS_VERSION> variable. This is usually handled automatically by +C<xsubpp>. See L<perlxs/"The VERSIONCHECK: Keyword">. + + XS_VERSION_BOOTCHECK; + +=item Zero + +The XSUB-writer's interface to the C C<memzero> function. The C<dest> is the +destination, C<nitems> is the number of items, and C<type> is the type. + + void Zero(void* dest, int nitems, type) + +=back + +=head1 AUTHORS + +Until May 1997, this document was maintained by Jeff Okamoto +<okamoto@corp.hp.com>. It is now maintained as part of Perl itself. + +With lots of help and suggestions from Dean Roehrich, Malcolm Beattie, +Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil +Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer, +Stephen McCamant, and Gurusamy Sarathy. + +API Listing originally by Dean Roehrich <roehrich@cray.com>. + +Updated to be autogenerated from comments in the source by Benjamin Stuhl. + +=head1 SEE ALSO + +perlguts(1), perlxs(1), perlxstut(1), perlintern(1) + diff --git a/contrib/perl5/pod/perlbook.pod b/contrib/perl5/pod/perlbook.pod index 76763cd..3a693dd 100644 --- a/contrib/perl5/pod/perlbook.pod +++ b/contrib/perl5/pod/perlbook.pod @@ -13,4 +13,4 @@ If you're web-connected, you can even mosey on over to http://www.ora.com/ for an online order form. Other Perl books from various publishers and authors -can be found listed in L<perlfaq3>. +can be found listed in L<perlfaq2>. diff --git a/contrib/perl5/pod/perlboot.pod b/contrib/perl5/pod/perlboot.pod new file mode 100644 index 0000000..b549f45 --- /dev/null +++ b/contrib/perl5/pod/perlboot.pod @@ -0,0 +1,811 @@ +=head1 NAME + +perlboot - Beginner's Object-Oriented Tutorial + +=head1 DESCRIPTION + +If you're not familiar with objects from other languages, some of the +other Perl object documentation may be a little daunting, such as +L<perlobj>, a basic reference in using objects, and L<perltoot>, which +introduces readers to the peculiarities of Perl's object system in a +tutorial way. + +So, let's take a different approach, presuming no prior object +experience. It helps if you know about subroutines (L<perlsub>), +references (L<perlref> et. seq.), and packages (L<perlmod>), so become +familiar with those first if you haven't already. + +=head2 If we could talk to the animals... + +Let's let the animals talk for a moment: + + sub Cow::speak { + print "a Cow goes moooo!\n"; + } + sub Horse::speak { + print "a Horse goes neigh!\n"; + } + sub Sheep::speak { + print "a Sheep goes baaaah!\n" + } + + Cow::speak; + Horse::speak; + Sheep::speak; + +This results in: + + a Cow goes moooo! + a Horse goes neigh! + a Sheep goes baaaah! + +Nothing spectacular here. Simple subroutines, albeit from separate +packages, and called using the full package name. So let's create +an entire pasture: + + # Cow::speak, Horse::speak, Sheep::speak as before + @pasture = qw(Cow Cow Horse Sheep Sheep); + foreach $animal (@pasture) { + &{$animal."::speak"}; + } + +This results in: + + a Cow goes moooo! + a Cow goes moooo! + a Horse goes neigh! + a Sheep goes baaaah! + a Sheep goes baaaah! + +Wow. That symbolic coderef de-referencing there is pretty nasty. +We're counting on C<no strict subs> mode, certainly not recommended +for larger programs. And why was that necessary? Because the name of +the package seems to be inseparable from the name of the subroutine we +want to invoke within that package. + +Or is it? + +=head2 Introducing the method invocation arrow + +For now, let's say that C<< Class->method >> invokes subroutine +C<method> in package C<Class>. (Here, "Class" is used in its +"category" meaning, not its "scholastic" meaning.) That's not +completely accurate, but we'll do this one step at a time. Now let's +use it like so: + + # Cow::speak, Horse::speak, Sheep::speak as before + Cow->speak; + Horse->speak; + Sheep->speak; + +And once again, this results in: + + a Cow goes moooo! + a Horse goes neigh! + a Sheep goes baaaah! + +That's not fun yet. Same number of characters, all constant, no +variables. But yet, the parts are separable now. Watch: + + $a = "Cow"; + $a->speak; # invokes Cow->speak + +Ahh! Now that the package name has been parted from the subroutine +name, we can use a variable package name. And this time, we've got +something that works even when C<use strict refs> is enabled. + +=head2 Invoking a barnyard + +Let's take that new arrow invocation and put it back in the barnyard +example: + + sub Cow::speak { + print "a Cow goes moooo!\n"; + } + sub Horse::speak { + print "a Horse goes neigh!\n"; + } + sub Sheep::speak { + print "a Sheep goes baaaah!\n" + } + + @pasture = qw(Cow Cow Horse Sheep Sheep); + foreach $animal (@pasture) { + $animal->speak; + } + +There! Now we have the animals all talking, and safely at that, +without the use of symbolic coderefs. + +But look at all that common code. Each of the C<speak> routines has a +similar structure: a C<print> operator and a string that contains +common text, except for two of the words. It'd be nice if we could +factor out the commonality, in case we decide later to change it all +to C<says> instead of C<goes>. + +And we actually have a way of doing that without much fuss, but we +have to hear a bit more about what the method invocation arrow is +actually doing for us. + +=head2 The extra parameter of method invocation + +The invocation of: + + Class->method(@args) + +attempts to invoke subroutine C<Class::method> as: + + Class::method("Class", @args); + +(If the subroutine can't be found, "inheritance" kicks in, but we'll +get to that later.) This means that we get the class name as the +first parameter (the only parameter, if no arguments are given). So +we can rewrite the C<Sheep> speaking subroutine as: + + sub Sheep::speak { + my $class = shift; + print "a $class goes baaaah!\n"; + } + +And the other two animals come out similarly: + + sub Cow::speak { + my $class = shift; + print "a $class goes moooo!\n"; + } + sub Horse::speak { + my $class = shift; + print "a $class goes neigh!\n"; + } + +In each case, C<$class> will get the value appropriate for that +subroutine. But once again, we have a lot of similar structure. Can +we factor that out even further? Yes, by calling another method in +the same class. + +=head2 Calling a second method to simplify things + +Let's call out from C<speak> to a helper method called C<sound>. +This method provides the constant text for the sound itself. + + { package Cow; + sub sound { "moooo" } + sub speak { + my $class = shift; + print "a $class goes ", $class->sound, "!\n" + } + } + +Now, when we call C<< Cow->speak >>, we get a C<$class> of C<Cow> in +C<speak>. This in turn selects the C<< Cow->sound >> method, which +returns C<moooo>. But how different would this be for the C<Horse>? + + { package Horse; + sub sound { "neigh" } + sub speak { + my $class = shift; + print "a $class goes ", $class->sound, "!\n" + } + } + +Only the name of the package and the specific sound change. So can we +somehow share the definition for C<speak> between the Cow and the +Horse? Yes, with inheritance! + +=head2 Inheriting the windpipes + +We'll define a common subroutine package called C<Animal>, with the +definition for C<speak>: + + { package Animal; + sub speak { + my $class = shift; + print "a $class goes ", $class->sound, "!\n" + } + } + +Then, for each animal, we say it "inherits" from C<Animal>, along +with the animal-specific sound: + + { package Cow; + @ISA = qw(Animal); + sub sound { "moooo" } + } + +Note the added C<@ISA> array. We'll get to that in a minute. + +But what happens when we invoke C<< Cow->speak >> now? + +First, Perl constructs the argument list. In this case, it's just +C<Cow>. Then Perl looks for C<Cow::speak>. But that's not there, so +Perl checks for the inheritance array C<@Cow::ISA>. It's there, +and contains the single name C<Animal>. + +Perl next checks for C<speak> inside C<Animal> instead, as in +C<Animal::speak>. And that's found, so Perl invokes that subroutine +with the already frozen argument list. + +Inside the C<Animal::speak> subroutine, C<$class> becomes C<Cow> (the +first argument). So when we get to the step of invoking +C<< $class->sound >>, it'll be looking for C<< Cow->sound >>, which +gets it on the first try without looking at C<@ISA>. Success! + +=head2 A few notes about @ISA + +This magical C<@ISA> variable (pronounced "is a" not "ice-uh"), has +declared that C<Cow> "is a" C<Animal>. Note that it's an array, +not a simple single value, because on rare occasions, it makes sense +to have more than one parent class searched for the missing methods. + +If C<Animal> also had an C<@ISA>, then we'd check there too. The +search is recursive, depth-first, left-to-right in each C<@ISA>. +Typically, each C<@ISA> has only one element (multiple elements means +multiple inheritance and multiple headaches), so we get a nice tree of +inheritance. + +When we turn on C<use strict>, we'll get complaints on C<@ISA>, since +it's not a variable containing an explicit package name, nor is it a +lexical ("my") variable. We can't make it a lexical variable though +(it has to belong to the package to be found by the inheritance mechanism), +so there's a couple of straightforward ways to handle that. + +The easiest is to just spell the package name out: + + @Cow::ISA = qw(Animal); + +Or allow it as an implicitly named package variable: + + package Cow; + use vars qw(@ISA); + @ISA = qw(Animal); + +If you're bringing in the class from outside, via an object-oriented +module, you change: + + package Cow; + use Animal; + use vars qw(@ISA); + @ISA = qw(Animal); + +into just: + + package Cow; + use base qw(Animal); + +And that's pretty darn compact. + +=head2 Overriding the methods + +Let's add a mouse, which can barely be heard: + + # Animal package from before + { package Mouse; + @ISA = qw(Animal); + sub sound { "squeak" } + sub speak { + my $class = shift; + print "a $class goes ", $class->sound, "!\n"; + print "[but you can barely hear it!]\n"; + } + } + + Mouse->speak; + +which results in: + + a Mouse goes squeak! + [but you can barely hear it!] + +Here, C<Mouse> has its own speaking routine, so C<< Mouse->speak >> +doesn't immediately invoke C<< Animal->speak >>. This is known as +"overriding". In fact, we didn't even need to say that a C<Mouse> was +an C<Animal> at all, since all of the methods needed for C<speak> are +completely defined with C<Mouse>. + +But we've now duplicated some of the code from C<< Animal->speak >>, +and this can once again be a maintenance headache. So, can we avoid +that? Can we say somehow that a C<Mouse> does everything any other +C<Animal> does, but add in the extra comment? Sure! + +First, we can invoke the C<Animal::speak> method directly: + + # Animal package from before + { package Mouse; + @ISA = qw(Animal); + sub sound { "squeak" } + sub speak { + my $class = shift; + Animal::speak($class); + print "[but you can barely hear it!]\n"; + } + } + +Note that we have to include the C<$class> parameter (almost surely +the value of C<"Mouse">) as the first parameter to C<Animal::speak>, +since we've stopped using the method arrow. Why did we stop? Well, +if we invoke C<< Animal->speak >> there, the first parameter to the +method will be C<"Animal"> not C<"Mouse">, and when time comes for it +to call for the C<sound>, it won't have the right class to come back +to this package. + +Invoking C<Animal::speak> directly is a mess, however. What if +C<Animal::speak> didn't exist before, and was being inherited from a +class mentioned in C<@Animal::ISA>? Because we are no longer using +the method arrow, we get one and only one chance to hit the right +subroutine. + +Also note that the C<Animal> classname is now hardwired into the +subroutine selection. This is a mess if someone maintains the code, +changing C<@ISA> for <Mouse> and didn't notice C<Animal> there in +C<speak>. So, this is probably not the right way to go. + +=head2 Starting the search from a different place + +A better solution is to tell Perl to search from a higher place +in the inheritance chain: + + # same Animal as before + { package Mouse; + # same @ISA, &sound as before + sub speak { + my $class = shift; + $class->Animal::speak; + print "[but you can barely hear it!]\n"; + } + } + +Ahh. This works. Using this syntax, we start with C<Animal> to find +C<speak>, and use all of C<Animal>'s inheritance chain if not found +immediately. And yet the first parameter will be C<$class>, so the +found C<speak> method will get C<Mouse> as its first entry, and +eventually work its way back to C<Mouse::sound> for the details. + +But this isn't the best solution. We still have to keep the C<@ISA> +and the initial search package coordinated. Worse, if C<Mouse> had +multiple entries in C<@ISA>, we wouldn't necessarily know which one +had actually defined C<speak>. So, is there an even better way? + +=head2 The SUPER way of doing things + +By changing the C<Animal> class to the C<SUPER> class in that +invocation, we get a search of all of our super classes (classes +listed in C<@ISA>) automatically: + + # same Animal as before + { package Mouse; + # same @ISA, &sound as before + sub speak { + my $class = shift; + $class->SUPER::speak; + print "[but you can barely hear it!]\n"; + } + } + +So, C<SUPER::speak> means look in the current package's C<@ISA> for +C<speak>, invoking the first one found. + +=head2 Where we're at so far... + +So far, we've seen the method arrow syntax: + + Class->method(@args); + +or the equivalent: + + $a = "Class"; + $a->method(@args); + +which constructs an argument list of: + + ("Class", @args) + +and attempts to invoke + + Class::method("Class", @Args); + +However, if C<Class::method> is not found, then C<@Class::ISA> is examined +(recursively) to locate a package that does indeed contain C<method>, +and that subroutine is invoked instead. + +Using this simple syntax, we have class methods, (multiple) +inheritance, overriding, and extending. Using just what we've seen so +far, we've been able to factor out common code, and provide a nice way +to reuse implementations with variations. This is at the core of what +objects provide, but objects also provide instance data, which we +haven't even begun to cover. + +=head2 A horse is a horse, of course of course -- or is it? + +Let's start with the code for the C<Animal> class +and the C<Horse> class: + + { package Animal; + sub speak { + my $class = shift; + print "a $class goes ", $class->sound, "!\n" + } + } + { package Horse; + @ISA = qw(Animal); + sub sound { "neigh" } + } + +This lets us invoke C<< Horse->speak >> to ripple upward to +C<Animal::speak>, calling back to C<Horse::sound> to get the specific +sound, and the output of: + + a Horse goes neigh! + +But all of our Horse objects would have to be absolutely identical. +If I add a subroutine, all horses automatically share it. That's +great for making horses the same, but how do we capture the +distinctions about an individual horse? For example, suppose I want +to give my first horse a name. There's got to be a way to keep its +name separate from the other horses. + +We can do that by drawing a new distinction, called an "instance". +An "instance" is generally created by a class. In Perl, any reference +can be an instance, so let's start with the simplest reference +that can hold a horse's name: a scalar reference. + + my $name = "Mr. Ed"; + my $talking = \$name; + +So now C<$talking> is a reference to what will be the instance-specific +data (the name). The final step in turning this into a real instance +is with a special operator called C<bless>: + + bless $talking, Horse; + +This operator stores information about the package named C<Horse> into +the thing pointed at by the reference. At this point, we say +C<$talking> is an instance of C<Horse>. That is, it's a specific +horse. The reference is otherwise unchanged, and can still be used +with traditional dereferencing operators. + +=head2 Invoking an instance method + +The method arrow can be used on instances, as well as names of +packages (classes). So, let's get the sound that C<$talking> makes: + + my $noise = $talking->sound; + +To invoke C<sound>, Perl first notes that C<$talking> is a blessed +reference (and thus an instance). It then constructs an argument +list, in this case from just C<($talking)>. (Later we'll see that +arguments will take their place following the instance variable, +just like with classes.) + +Now for the fun part: Perl takes the class in which the instance was +blessed, in this case C<Horse>, and uses that to locate the subroutine +to invoke the method. In this case, C<Horse::sound> is found directly +(without using inheritance), yielding the final subroutine invocation: + + Horse::sound($talking) + +Note that the first parameter here is still the instance, not the name +of the class as before. We'll get C<neigh> as the return value, and +that'll end up as the C<$noise> variable above. + +If Horse::sound had not been found, we'd be wandering up the +C<@Horse::ISA> list to try to find the method in one of the +superclasses, just as for a class method. The only difference between +a class method and an instance method is whether the first parameter +is an instance (a blessed reference) or a class name (a string). + +=head2 Accessing the instance data + +Because we get the instance as the first parameter, we can now access +the instance-specific data. In this case, let's add a way to get at +the name: + + { package Horse; + @ISA = qw(Animal); + sub sound { "neigh" } + sub name { + my $self = shift; + $$self; + } + } + +Now we call for the name: + + print $talking->name, " says ", $talking->sound, "\n"; + +Inside C<Horse::name>, the C<@_> array contains just C<$talking>, +which the C<shift> stores into C<$self>. (It's traditional to shift +the first parameter off into a variable named C<$self> for instance +methods, so stay with that unless you have strong reasons otherwise.) +Then, C<$self> gets de-referenced as a scalar ref, yielding C<Mr. Ed>, +and we're done with that. The result is: + + Mr. Ed says neigh. + +=head2 How to build a horse + +Of course, if we constructed all of our horses by hand, we'd most +likely make mistakes from time to time. We're also violating one of +the properties of object-oriented programming, in that the "inside +guts" of a Horse are visible. That's good if you're a veterinarian, +but not if you just like to own horses. So, let's let the Horse class +build a new horse: + + { package Horse; + @ISA = qw(Animal); + sub sound { "neigh" } + sub name { + my $self = shift; + $$self; + } + sub named { + my $class = shift; + my $name = shift; + bless \$name, $class; + } + } + +Now with the new C<named> method, we can build a horse: + + my $talking = Horse->named("Mr. Ed"); + +Notice we're back to a class method, so the two arguments to +C<Horse::named> are C<Horse> and C<Mr. Ed>. The C<bless> operator +not only blesses C<$name>, it also returns the reference to C<$name>, +so that's fine as a return value. And that's how to build a horse. + +We've called the constructor C<named> here, so that it quickly denotes +the constructor's argument as the name for this particular C<Horse>. +You can use different constructors with different names for different +ways of "giving birth" to the object (like maybe recording its +pedigree or date of birth). However, you'll find that most people +coming to Perl from more limited languages use a single constructor +named C<new>, with various ways of interpreting the arguments to +C<new>. Either style is fine, as long as you document your particular +way of giving birth to an object. (And you I<were> going to do that, +right?) + +=head2 Inheriting the constructor + +But was there anything specific to C<Horse> in that method? No. Therefore, +it's also the same recipe for building anything else that inherited from +C<Animal>, so let's put it there: + + { package Animal; + sub speak { + my $class = shift; + print "a $class goes ", $class->sound, "!\n" + } + sub name { + my $self = shift; + $$self; + } + sub named { + my $class = shift; + my $name = shift; + bless \$name, $class; + } + } + { package Horse; + @ISA = qw(Animal); + sub sound { "neigh" } + } + +Ahh, but what happens if we invoke C<speak> on an instance? + + my $talking = Horse->named("Mr. Ed"); + $talking->speak; + +We get a debugging value: + + a Horse=SCALAR(0xaca42ac) goes neigh! + +Why? Because the C<Animal::speak> routine is expecting a classname as +its first parameter, not an instance. When the instance is passed in, +we'll end up using a blessed scalar reference as a string, and that +shows up as we saw it just now. + +=head2 Making a method work with either classes or instances + +All we need is for a method to detect if it is being called on a class +or called on an instance. The most straightforward way is with the +C<ref> operator. This returns a string (the classname) when used on a +blessed reference, and C<undef> when used on a string (like a +classname). Let's modify the C<name> method first to notice the change: + + sub name { + my $either = shift; + ref $either + ? $$either # it's an instance, return name + : "an unnamed $either"; # it's a class, return generic + } + +Here, the C<?:> operator comes in handy to select either the +dereference or a derived string. Now we can use this with either an +instance or a class. Note that I've changed the first parameter +holder to C<$either> to show that this is intended: + + my $talking = Horse->named("Mr. Ed"); + print Horse->name, "\n"; # prints "an unnamed Horse\n" + print $talking->name, "\n"; # prints "Mr Ed.\n" + +and now we'll fix C<speak> to use this: + + sub speak { + my $either = shift; + print $either->name, " goes ", $either->sound, "\n"; + } + +And since C<sound> already worked with either a class or an instance, +we're done! + +=head2 Adding parameters to a method + +Let's train our animals to eat: + + { package Animal; + sub named { + my $class = shift; + my $name = shift; + bless \$name, $class; + } + sub name { + my $either = shift; + ref $either + ? $$either # it's an instance, return name + : "an unnamed $either"; # it's a class, return generic + } + sub speak { + my $either = shift; + print $either->name, " goes ", $either->sound, "\n"; + } + sub eat { + my $either = shift; + my $food = shift; + print $either->name, " eats $food.\n"; + } + } + { package Horse; + @ISA = qw(Animal); + sub sound { "neigh" } + } + { package Sheep; + @ISA = qw(Animal); + sub sound { "baaaah" } + } + +And now try it out: + + my $talking = Horse->named("Mr. Ed"); + $talking->eat("hay"); + Sheep->eat("grass"); + +which prints: + + Mr. Ed eats hay. + an unnamed Sheep eats grass. + +An instance method with parameters gets invoked with the instance, +and then the list of parameters. So that first invocation is like: + + Animal::eat($talking, "hay"); + +=head2 More interesting instances + +What if an instance needs more data? Most interesting instances are +made of many items, each of which can in turn be a reference or even +another object. The easiest way to store these is often in a hash. +The keys of the hash serve as the names of parts of the object (often +called "instance variables" or "member variables"), and the +corresponding values are, well, the values. + +But how do we turn the horse into a hash? Recall that an object was +any blessed reference. We can just as easily make it a blessed hash +reference as a blessed scalar reference, as long as everything that +looks at the reference is changed accordingly. + +Let's make a sheep that has a name and a color: + + my $bad = bless { Name => "Evil", Color => "black" }, Sheep; + +so C<< $bad->{Name} >> has C<Evil>, and C<< $bad->{Color} >> has +C<black>. But we want to make C<< $bad->name >> access the name, and +that's now messed up because it's expecting a scalar reference. Not +to worry, because that's pretty easy to fix up: + + ## in Animal + sub name { + my $either = shift; + ref $either ? + $either->{Name} : + "an unnamed $either"; + } + +And of course C<named> still builds a scalar sheep, so let's fix that +as well: + + ## in Animal + sub named { + my $class = shift; + my $name = shift; + my $self = { Name => $name, Color => $class->default_color }; + bless $self, $class; + } + +What's this C<default_color>? Well, if C<named> has only the name, +we still need to set a color, so we'll have a class-specific initial color. +For a sheep, we might define it as white: + + ## in Sheep + sub default_color { "white" } + +And then to keep from having to define one for each additional class, +we'll define a "backstop" method that serves as the "default default", +directly in C<Animal>: + + ## in Animal + sub default_color { "brown" } + +Now, because C<name> and C<named> were the only methods that +referenced the "structure" of the object, the rest of the methods can +remain the same, so C<speak> still works as before. + +=head2 A horse of a different color + +But having all our horses be brown would be boring. So let's add a +method or two to get and set the color. + + ## in Animal + sub color { + $_[0]->{Color} + } + sub set_color { + $_[0]->{Color} = $_[1]; + } + +Note the alternate way of accessing the arguments: C<$_[0]> is used +in-place, rather than with a C<shift>. (This saves us a bit of time +for something that may be invoked frequently.) And now we can fix +that color for Mr. Ed: + + my $talking = Horse->named("Mr. Ed"); + $talking->set_color("black-and-white"); + print $talking->name, " is colored ", $talking->color, "\n"; + +which results in: + + Mr. Ed is colored black-and-white + +=head2 Summary + +So, now we have class methods, constructors, instance methods, +instance data, and even accessors. But that's still just the +beginning of what Perl has to offer. We haven't even begun to talk +about accessors that double as getters and setters, destructors, +indirect object notation, subclasses that add instance data, per-class +data, overloading, "isa" and "can" tests, C<UNIVERSAL> class, and so +on. That's for the rest of the Perl documentation to cover. +Hopefully, this gets you started, though. + +=head1 SEE ALSO + +For more information, see L<perlobj> (for all the gritty details about +Perl objects, now that you've seen the basics), L<perltoot> (the +tutorial for those who already know objects), L<perlbot> (for some +more tricks), and books such as Damian Conway's excellent I<Object +Oriented Perl>. + +=head1 COPYRIGHT + +Copyright (c) 1999, 2000 by Randal L. Schwartz and Stonehenge +Consulting Services, Inc. Permission is hereby granted to distribute +this document intact with the Perl distribution, and in accordance +with the licenses of the Perl distribution; derived documents must +include this copyright notice intact. + +Portions of this text have been derived from Perl Training materials +originally appearing in the I<Packages, References, Objects, and +Modules> course taught by instructors for Stonehenge Consulting +Services, Inc. and used with permission. + +Portions of this text have been derived from materials originally +appearing in I<Linux Magazine> and used with permission. diff --git a/contrib/perl5/pod/perlcall.pod b/contrib/perl5/pod/perlcall.pod index 2b83780..148b24b 100644 --- a/contrib/perl5/pod/perlcall.pod +++ b/contrib/perl5/pod/perlcall.pod @@ -45,7 +45,7 @@ Before you launch yourself head first into the rest of this document, it would be a good idea to have read the following two documents - L<perlxs> and L<perlguts>. -=head1 THE PERL_CALL FUNCTIONS +=head1 THE CALL_ FUNCTIONS Although this stuff is easier to explain using examples, you first need be aware of a few important definitions. @@ -53,17 +53,17 @@ be aware of a few important definitions. Perl has a number of C functions that allow you to call Perl subroutines. They are - I32 perl_call_sv(SV* sv, I32 flags) ; - I32 perl_call_pv(char *subname, I32 flags) ; - I32 perl_call_method(char *methname, I32 flags) ; - I32 perl_call_argv(char *subname, I32 flags, register char **argv) ; + I32 call_sv(SV* sv, I32 flags) ; + I32 call_pv(char *subname, I32 flags) ; + I32 call_method(char *methname, I32 flags) ; + I32 call_argv(char *subname, I32 flags, register char **argv) ; -The key function is I<perl_call_sv>. All the other functions are +The key function is I<call_sv>. All the other functions are fairly simple wrappers which make it easier to call Perl subroutines in -special cases. At the end of the day they will all call I<perl_call_sv> +special cases. At the end of the day they will all call I<call_sv> to invoke the Perl subroutine. -All the I<perl_call_*> functions have a C<flags> parameter which is +All the I<call_*> functions have a C<flags> parameter which is used to pass a bit mask of options to Perl. This bit mask operates identically for each of the functions. The settings available in the bit mask are discussed in L<FLAG VALUES>. @@ -72,40 +72,40 @@ Each of the functions will now be discussed in turn. =over 5 -=item perl_call_sv +=item call_sv -I<perl_call_sv> takes two parameters, the first, C<sv>, is an SV*. +I<call_sv> takes two parameters, the first, C<sv>, is an SV*. This allows you to specify the Perl subroutine to be called either as a C string (which has first been converted to an SV) or a reference to a -subroutine. The section, I<Using perl_call_sv>, shows how you can make -use of I<perl_call_sv>. +subroutine. The section, I<Using call_sv>, shows how you can make +use of I<call_sv>. -=item perl_call_pv +=item call_pv -The function, I<perl_call_pv>, is similar to I<perl_call_sv> except it +The function, I<call_pv>, is similar to I<call_sv> except it expects its first parameter to be a C char* which identifies the Perl -subroutine you want to call, e.g., C<perl_call_pv("fred", 0)>. If the +subroutine you want to call, e.g., C<call_pv("fred", 0)>. If the subroutine you want to call is in another package, just include the package name in the string, e.g., C<"pkg::fred">. -=item perl_call_method +=item call_method -The function I<perl_call_method> is used to call a method from a Perl +The function I<call_method> is used to call a method from a Perl class. The parameter C<methname> corresponds to the name of the method to be called. Note that the class that the method belongs to is passed on the Perl stack rather than in the parameter list. This class can be either the name of the class (for a static method) or a reference to an object (for a virtual method). See L<perlobj> for more information on -static and virtual methods and L<Using perl_call_method> for an example -of using I<perl_call_method>. +static and virtual methods and L<Using call_method> for an example +of using I<call_method>. -=item perl_call_argv +=item call_argv -I<perl_call_argv> calls the Perl subroutine specified by the C string +I<call_argv> calls the Perl subroutine specified by the C string stored in the C<subname> parameter. It also takes the usual C<flags> parameter. The final parameter, C<argv>, consists of a NULL terminated list of C strings to be passed as parameters to the Perl subroutine. -See I<Using perl_call_argv>. +See I<Using call_argv>. =back @@ -116,12 +116,12 @@ subroutine are stored on the Perl stack. As a general rule you should I<always> check the return value from these functions. Even if you are expecting only a particular number of values to be returned from the Perl subroutine, there is nothing to -stop someone from doing something unexpected - don't say you haven't +stop someone from doing something unexpected--don't say you haven't been warned. =head1 FLAG VALUES -The C<flags> parameter in all the I<perl_call_*> functions is a bit mask +The C<flags> parameter in all the I<call_*> functions is a bit mask which can consist of any combination of the symbols defined below, OR'ed together. @@ -146,7 +146,7 @@ It ensures that nothing is actually returned from the subroutine. =back -The value returned by the I<perl_call_*> function indicates how many +The value returned by the I<call_*> function indicates how many items have been returned by the Perl subroutine - in this case it will be 0. @@ -154,7 +154,7 @@ be 0. =head2 G_SCALAR Calls the Perl subroutine in a scalar context. This is the default -context flag setting for all the I<perl_call_*> functions. +context flag setting for all the I<call_*> functions. This flag has 2 effects: @@ -174,7 +174,7 @@ returned. =back -The value returned by the I<perl_call_*> function indicates how many +The value returned by the I<call_*> function indicates how many items have been returned by the Perl subroutine - in this case it will be either 0 or 1. @@ -187,7 +187,7 @@ many items the Perl subroutine returns, only the last one will be accessible from the stack - think of the case where only one value is returned as being a list with only one element. Any other items that were returned will not exist by the time control returns from the -I<perl_call_*> function. The section I<Returning a list in a scalar +I<call_*> function. The section I<Returning a list in a scalar context> shows an example of this behavior. @@ -208,11 +208,11 @@ array context (if it executes I<wantarray> the result will be true). =item 2. It ensures that all items returned from the subroutine will be -accessible when control returns from the I<perl_call_*> function. +accessible when control returns from the I<call_*> function. =back -The value returned by the I<perl_call_*> function indicates how many +The value returned by the I<call_*> function indicates how many items have been returned by the Perl subroutine. If 0, then you have specified the G_DISCARD flag. @@ -225,7 +225,7 @@ Perl stack. =head2 G_DISCARD -By default, the I<perl_call_*> functions place the items returned from +By default, the I<call_*> functions place the items returned from by the Perl subroutine on the stack. If you are not interested in these items, then setting this flag will make Perl get rid of them automatically for you. Note that it is still possible to indicate a @@ -241,7 +241,7 @@ can ignore the problem and let Perl deal with it for you. =head2 G_NOARGS -Whenever a Perl subroutine is called using one of the I<perl_call_*> +Whenever a Perl subroutine is called using one of the I<call_*> functions, it is assumed by default that parameters are to be passed to the subroutine. If you are not passing any parameters to the Perl subroutine, you can save a bit of time by setting this flag. It has @@ -255,7 +255,7 @@ has been called to think that you have passed it parameters. In fact, what can happen is that the Perl subroutine you have called can access the C<@_> array from a previous Perl subroutine. This will -occur when the code that is executing the I<perl_call_*> function has +occur when the code that is executing the I<call_*> function has itself been called from another Perl subroutine. The code below illustrates this @@ -284,10 +284,10 @@ process will terminate immediately. If you want to trap this type of event, specify the G_EVAL flag. It will put an I<eval { }> around the subroutine call. -Whenever control returns from the I<perl_call_*> function you need to +Whenever control returns from the I<call_*> function you need to check the C<$@> variable as you would in a normal Perl script. -The value returned from the I<perl_call_*> function is dependent on +The value returned from the I<call_*> function is dependent on what other flags have been specified and whether an error has occurred. Here are all the different cases that can occur: @@ -295,7 +295,7 @@ occurred. Here are all the different cases that can occur: =item * -If the I<perl_call_*> function returns normally, then the value +If the I<call_*> function returns normally, then the value returned is as specified in the previous sections. =item * @@ -338,7 +338,7 @@ such situations, you will not want to clear C<$@> at all, but simply to append any new errors to any existing value of C<$@>. The G_KEEPERR flag is meant to be used in conjunction with G_EVAL in -I<perl_call_*> functions that are used to implement such code. This flag +I<call_*> functions that are used to implement such code. This flag has no effect when G_EVAL is not used. When G_KEEPERR is used, any errors in the called code will be prefixed @@ -365,7 +365,7 @@ section I<Using GIMME_V>. =head1 KNOWN PROBLEMS This section outlines all known problems that exist in the -I<perl_call_*> functions. +I<call_*> functions. =over 5 @@ -378,12 +378,12 @@ flags will not work as described in the section I<FLAG VALUES>. Specifically, if the two flags are used when calling a subroutine and that subroutine does not call I<die>, the value returned by -I<perl_call_*> will be wrong. +I<call_*> will be wrong. =item 2. -In Perl 5.000 and 5.001 there is a problem with using I<perl_call_*> if +In Perl 5.000 and 5.001 there is a problem with using I<call_*> if the Perl sub you are calling attempts to trap a I<die>. The symptom of this problem is that the called Perl sub will continue @@ -405,7 +405,7 @@ via this XSUB Call_fred() CODE: PUSHMARK(SP) ; - perl_call_pv("fred", G_DISCARD|G_NOARGS) ; + call_pv("fred", G_DISCARD|G_NOARGS) ; fprintf(stderr, "back in Call_fred\n") ; When C<Call_fred> is executed it will print @@ -416,13 +416,13 @@ As control never returns to C<Call_fred>, the C<"back in Call_fred"> string will not get printed. To work around this problem, you can either upgrade to Perl 5.002 or -higher, or use the G_EVAL flag with I<perl_call_*> as shown below +higher, or use the G_EVAL flag with I<call_*> as shown below void Call_fred() CODE: PUSHMARK(SP) ; - perl_call_pv("fred", G_EVAL|G_DISCARD|G_NOARGS) ; + call_pv("fred", G_EVAL|G_DISCARD|G_NOARGS) ; fprintf(stderr, "back in Call_fred\n") ; =back @@ -439,11 +439,11 @@ to Perl internals. We hope this should make the code less vulnerable to any changes made to Perl in the future. Another point worth noting is that in the first series of examples I -have made use of only the I<perl_call_pv> function. This has been done +have made use of only the I<call_pv> function. This has been done to keep the code simpler and ease you into the topic. Wherever -possible, if the choice is between using I<perl_call_pv> and -I<perl_call_sv>, you should always try to use I<perl_call_sv>. See -I<Using perl_call_sv> for details. +possible, if the choice is between using I<call_pv> and +I<call_sv>, you should always try to use I<call_sv>. See +I<Using call_sv> for details. =head2 No Parameters, Nothing returned @@ -463,7 +463,7 @@ and here is a C function to call it dSP ; PUSHMARK(SP) ; - perl_call_pv("PrintUID", G_DISCARD|G_NOARGS) ; + call_pv("PrintUID", G_DISCARD|G_NOARGS) ; } Simple, eh. @@ -487,27 +487,27 @@ specified. We aren't interested in anything returned from I<PrintUID>, so G_DISCARD is specified. Even if I<PrintUID> was changed to return some value(s), having specified G_DISCARD will mean that they -will be wiped by the time control returns from I<perl_call_pv>. +will be wiped by the time control returns from I<call_pv>. =item 4. -As I<perl_call_pv> is being used, the Perl subroutine is specified as a +As I<call_pv> is being used, the Perl subroutine is specified as a C string. In this case the subroutine name has been 'hard-wired' into the code. =item 5. Because we specified G_DISCARD, it is not necessary to check the value -returned from I<perl_call_pv>. It will always be 0. +returned from I<call_pv>. It will always be 0. =back =head2 Passing Parameters Now let's make a slightly more complex example. This time we want to -call a Perl subroutine, C<LeftString>, which will take 2 parameters - a -string (C<$s>) and an integer (C<$n>). The subroutine will simply -print the first C<$n> characters of the string. +call a Perl subroutine, C<LeftString>, which will take 2 parameters--a +string ($s) and an integer ($n). The subroutine will simply +print the first $n characters of the string. So the Perl subroutine would look like this @@ -534,7 +534,7 @@ The C function required to call I<LeftString> would look like this. XPUSHs(sv_2mortal(newSViv(b))); PUTBACK ; - perl_call_pv("LeftString", G_DISCARD); + call_pv("LeftString", G_DISCARD); FREETMPS ; LEAVE ; @@ -555,7 +555,7 @@ as C<SP>. =item 2. If you are going to put something onto the Perl stack, you need to know -where to put it. This is the purpose of the macro C<dSP> - it declares +where to put it. This is the purpose of the macro C<dSP>--it declares and initializes a I<local> copy of the Perl stack pointer. All the other macros which will be used in this example require you to @@ -563,7 +563,7 @@ have used this macro. The exception to this rule is if you are calling a Perl subroutine directly from an XSUB function. In this case it is not necessary to -use the C<dSP> macro explicitly - it will be declared for you +use the C<dSP> macro explicitly--it will be declared for you automatically. =item 3. @@ -578,12 +578,12 @@ The C<PUSHMARK> macro tells Perl to make a mental note of the current stack pointer. Even if you aren't passing any parameters (like the example shown in the section I<No Parameters, Nothing returned>) you must still call the C<PUSHMARK> macro before you can call any of the -I<perl_call_*> functions - Perl still needs to know that there are no +I<call_*> functions--Perl still needs to know that there are no parameters. The C<PUTBACK> macro sets the global copy of the stack pointer to be -the same as our local copy. If we didn't do this I<perl_call_pv> -wouldn't know where the two parameters we pushed were - remember that +the same as our local copy. If we didn't do this I<call_pv> +wouldn't know where the two parameters we pushed were--remember that up to now all the stack pointer manipulation we have done is with our local copy, I<not> the global copy. @@ -634,7 +634,7 @@ an alternative to using these macros. =item 7. -Finally, I<LeftString> can now be called via the I<perl_call_pv> +Finally, I<LeftString> can now be called via the I<call_pv> function. =back @@ -672,7 +672,7 @@ function required to call it is now a bit more complex. XPUSHs(sv_2mortal(newSViv(b))); PUTBACK ; - count = perl_call_pv("Adder", G_SCALAR); + count = call_pv("Adder", G_SCALAR); SPAGAIN ; @@ -694,23 +694,23 @@ Points to note this time are The only flag specified this time was G_SCALAR. That means the C<@_> array will be created and that the value returned by I<Adder> will -still exist after the call to I<perl_call_pv>. +still exist after the call to I<call_pv>. =item 2. The purpose of the macro C<SPAGAIN> is to refresh the local copy of the stack pointer. This is necessary because it is possible that the memory allocated to the Perl stack has been reallocated whilst in the -I<perl_call_pv> call. +I<call_pv> call. If you are making use of the Perl stack pointer in your code you must always refresh the local copy using SPAGAIN whenever you make use -of the I<perl_call_*> functions or any other Perl internal function. +of the I<call_*> functions or any other Perl internal function. =item 3. Although only a single value was expected to be returned from I<Adder>, -it is still good practice to check the return code from I<perl_call_pv> +it is still good practice to check the return code from I<call_pv> anyway. Expecting a single value is not quite the same as knowing that there @@ -776,7 +776,7 @@ and this is the C function XPUSHs(sv_2mortal(newSViv(b))); PUTBACK ; - count = perl_call_pv("AddSubtract", G_ARRAY); + count = call_pv("AddSubtract", G_ARRAY); SPAGAIN ; @@ -839,7 +839,7 @@ context, like this XPUSHs(sv_2mortal(newSViv(b))); PUTBACK ; - count = perl_call_pv("AddSubtract", G_SCALAR); + count = call_pv("AddSubtract", G_SCALAR); SPAGAIN ; @@ -907,7 +907,7 @@ and here is a C function to call it. XPUSHs(svb); PUTBACK ; - count = perl_call_pv("Inc", G_DISCARD); + count = call_pv("Inc", G_DISCARD); if (count != 0) croak ("call_Inc: expected 0 values from 'Inc', got %d\n", @@ -921,12 +921,12 @@ and here is a C function to call it. } To be able to access the two parameters that were pushed onto the stack -after they return from I<perl_call_pv> it is necessary to make a note -of their addresses - thus the two variables C<sva> and C<svb>. +after they return from I<call_pv> it is necessary to make a note +of their addresses--thus the two variables C<sva> and C<svb>. The reason this is necessary is that the area of the Perl stack which held them will very likely have been overwritten by something else by -the time control returns from I<perl_call_pv>. +the time control returns from I<call_pv>. @@ -964,7 +964,7 @@ and some C to call it XPUSHs(sv_2mortal(newSViv(b))); PUTBACK ; - count = perl_call_pv("Subtract", G_EVAL|G_SCALAR); + count = call_pv("Subtract", G_EVAL|G_SCALAR); SPAGAIN ; @@ -1031,7 +1031,7 @@ refers to the C equivalent of C<$@>. Note that the stack is popped using C<POPs> in the block where C<SvTRUE(ERRSV)> is true. This is necessary because whenever a -I<perl_call_*> function invoked with G_EVAL|G_SCALAR returns an error, +I<call_*> function invoked with G_EVAL|G_SCALAR returns an error, the top of the stack holds the value I<undef>. Because we want the program to continue after detecting this error, it is essential that the stack is tidied up by removing the I<undef>. @@ -1061,18 +1061,18 @@ version of the call_Subtract example above inside a destructor: This example will fail to recognize that an error occurred inside the C<eval {}>. Here's why: the call_Subtract code got executed while perl was cleaning up temporaries when exiting the eval block, and because -call_Subtract is implemented with I<perl_call_pv> using the G_EVAL +call_Subtract is implemented with I<call_pv> using the G_EVAL flag, it promptly reset C<$@>. This results in the failure of the outermost test for C<$@>, and thereby the failure of the error trap. -Appending the G_KEEPERR flag, so that the I<perl_call_pv> call in +Appending the G_KEEPERR flag, so that the I<call_pv> call in call_Subtract reads: - count = perl_call_pv("Subtract", G_EVAL|G_SCALAR|G_KEEPERR); + count = call_pv("Subtract", G_EVAL|G_SCALAR|G_KEEPERR); will preserve the error and restore reliable error handling. -=head2 Using perl_call_sv +=head2 Using call_sv In all the previous examples I have 'hard-wired' the name of the Perl subroutine to be called from C. Most of the time though, it is more @@ -1095,23 +1095,23 @@ Here is a snippet of XSUB which defines I<CallSubPV>. char * name CODE: PUSHMARK(SP) ; - perl_call_pv(name, G_DISCARD|G_NOARGS) ; + call_pv(name, G_DISCARD|G_NOARGS) ; That is fine as far as it goes. The thing is, the Perl subroutine can be specified as only a string. For Perl 4 this was adequate, but Perl 5 allows references to subroutines and anonymous subroutines. -This is where I<perl_call_sv> is useful. +This is where I<call_sv> is useful. The code below for I<CallSubSV> is identical to I<CallSubPV> except that the C<name> parameter is now defined as an SV* and we use -I<perl_call_sv> instead of I<perl_call_pv>. +I<call_sv> instead of I<call_pv>. void CallSubSV(name) SV * name CODE: PUSHMARK(SP) ; - perl_call_sv(name, G_DISCARD|G_NOARGS) ; + call_sv(name, G_DISCARD|G_NOARGS) ; Because we are using an SV to call I<fred> the following can all be used @@ -1121,7 +1121,7 @@ Because we are using an SV to call I<fred> the following can all be used CallSubSV($ref) ; CallSubSV( sub { print "Hello there\n" } ) ; -As you can see, I<perl_call_sv> gives you much greater flexibility in +As you can see, I<call_sv> gives you much greater flexibility in how you can specify the Perl subroutine. You should note that if it is necessary to store the SV (C<name> in the @@ -1141,7 +1141,7 @@ pointer to the SV. Say the code above had been like this CallSavedSub1() CODE: PUSHMARK(SP) ; - perl_call_sv(rememberSub, G_DISCARD|G_NOARGS) ; + call_sv(rememberSub, G_DISCARD|G_NOARGS) ; The reason this is wrong is that by the time you come to use the pointer C<rememberSub> in C<CallSavedSub1>, it may or may not still refer @@ -1175,11 +1175,11 @@ the version of Perl you are using) Not a CODE reference at ... Undefined subroutine &main::47 called ... -The variable C<$ref> may have referred to the subroutine C<fred> +The variable $ref may have referred to the subroutine C<fred> whenever the call to C<SaveSub1> was made but by the time C<CallSavedSub1> gets called it now holds the number C<47>. Because we saved only a pointer to the original SV in C<SaveSub1>, any changes to -C<$ref> will be tracked by the pointer C<rememberSub>. This means that +$ref will be tracked by the pointer C<rememberSub>. This means that whenever C<CallSavedSub1> gets called, it will attempt to execute the code which is referenced by the SV* C<rememberSub>. In this case though, it now refers to the integer C<47>, so expect Perl to complain @@ -1217,7 +1217,7 @@ SV. The code below shows C<SaveSub2> modified to do that CallSavedSub2() CODE: PUSHMARK(SP) ; - perl_call_sv(keepSub, G_DISCARD|G_NOARGS) ; + call_sv(keepSub, G_DISCARD|G_NOARGS) ; To avoid creating a new SV every time C<SaveSub2> is called, the function first checks to see if it has been called before. If not, @@ -1227,7 +1227,7 @@ operation using C<newSVsv>. Thereafter, whenever C<SaveSub2> is called the existing SV, C<keepSub>, is overwritten with the new value using C<SvSetSV>. -=head2 Using perl_call_argv +=head2 Using call_argv Here is a Perl subroutine which prints whatever parameters are passed to it. @@ -1239,7 +1239,7 @@ to it. foreach (@list) { print "$_\n" } } -and here is an example of I<perl_call_argv> which will call +and here is an example of I<call_argv> which will call I<PrintList>. static char * words[] = {"alpha", "beta", "gamma", "delta", NULL} ; @@ -1249,13 +1249,13 @@ I<PrintList>. { dSP ; - perl_call_argv("PrintList", G_DISCARD, words) ; + call_argv("PrintList", G_DISCARD, words) ; } Note that it is not necessary to call C<PUSHMARK> in this instance. -This is because I<perl_call_argv> will do it for you. +This is because I<call_argv> will do it for you. -=head2 Using perl_call_method +=head2 Using call_method Consider the following Perl code @@ -1330,7 +1330,7 @@ the C<PrintID> and C<Display> methods from C. XPUSHs(sv_2mortal(newSViv(index))) ; PUTBACK; - perl_call_method(method, G_DISCARD) ; + call_method(method, G_DISCARD) ; void call_PrintID(class, method) @@ -1341,7 +1341,7 @@ the C<PrintID> and C<Display> methods from C. XPUSHs(sv_2mortal(newSVpv(class, 0))) ; PUTBACK; - perl_call_method(method, G_DISCARD) ; + call_method(method, G_DISCARD) ; So the methods C<PrintID> and C<Display> can be invoked like this @@ -1351,8 +1351,8 @@ So the methods C<PrintID> and C<Display> can be invoked like this call_PrintID('Mine', 'PrintID') ; The only thing to note is that in both the static and virtual methods, -the method name is not passed via the stack - it is used as the first -parameter to I<perl_call_method>. +the method name is not passed via the stack--it is used as the first +parameter to I<call_method>. =head2 Using GIMME_V @@ -1385,14 +1385,14 @@ The output from that will be =head2 Using Perl to dispose of temporaries In the examples given to date, any temporaries created in the callback -(i.e., parameters passed on the stack to the I<perl_call_*> function or +(i.e., parameters passed on the stack to the I<call_*> function or values returned via the stack) have been freed by one of these methods =over 5 =item * -specifying the G_DISCARD flag with I<perl_call_*>. +specifying the G_DISCARD flag with I<call_*>. =item * @@ -1440,11 +1440,11 @@ situation ... error occurs ... - external library --> perl_call --> perl + external library --> call_* --> perl | - perl <-- XSUB <-- external library <-- perl_call <----+ + perl <-- XSUB <-- external library <-- call_* <----+ -After processing of the error using I<perl_call_*> is completed, +After processing of the error using I<call_*> is completed, control reverts back to Perl more or less immediately. In the diagram, the further right you go the more deeply nested the @@ -1457,22 +1457,22 @@ will be more like this perl --> XSUB --> event handler ... - event handler --> perl_call --> perl + event handler --> call_* --> perl | - event handler <-- perl_call <----+ + event handler <-- call_* <----+ ... - event handler --> perl_call --> perl + event handler --> call_* --> perl | - event handler <-- perl_call <----+ + event handler <-- call_* <----+ ... - event handler --> perl_call --> perl + event handler --> call_* --> perl | - event handler <-- perl_call <----+ + event handler <-- call_* <----+ In this case the flow of control can consist of only the repeated sequence - event handler --> perl_call --> perl + event handler --> call_* --> perl for practically the complete duration of the program. This means that control may I<never> drop back to the surrounding scope in Perl at the @@ -1485,9 +1485,9 @@ enclosing scope at some stage. In the event driven scenario that may never happen. This means that as time goes on, your program will create more and more temporaries, none of which will ever be freed. As each of these temporaries consumes some memory your program will -eventually consume all the available memory in your system - kapow! +eventually consume all the available memory in your system--kapow! -So here is the bottom line - if you are sure that control will revert +So here is the bottom line--if you are sure that control will revert back to the enclosing Perl scope fairly quickly after the end of your callback, then it isn't absolutely necessary to dispose explicitly of any temporaries you may have created. Mind you, if you are at all @@ -1532,7 +1532,7 @@ Now change that to call a Perl subroutine instead PUSHMARK(SP) ; /* Call the Perl sub to process the callback */ - perl_call_sv(callback, G_DISCARD) ; + call_sv(callback, G_DISCARD) ; } @@ -1579,7 +1579,7 @@ require is a means of storing the mapping between the opened file and the Perl subroutine we want to be called for that file. Say the i/o library has a function C<asynch_read> which associates a C -function C<ProcessRead> with a file handle C<fh> - this assumes that it +function C<ProcessRead> with a file handle C<fh>--this assumes that it has also provided some routine to open the file and so obtain the file handle. @@ -1638,7 +1638,7 @@ and C<asynch_read_if> could look like this PUTBACK ; /* Call the Perl sub */ - perl_call_sv(*sv, G_DISCARD) ; + call_sv(*sv, G_DISCARD) ; } For completeness, here is C<asynch_close>. This shows how to remove @@ -1721,7 +1721,7 @@ series of C functions to act as the interface to Perl, thus PUTBACK ; /* Call the Perl sub */ - perl_call_sv(Map[index].PerlSub, G_DISCARD) ; + call_sv(Map[index].PerlSub, G_DISCARD) ; } static void @@ -1875,7 +1875,7 @@ of values> recoded to use C<ST> instead of C<POP*>. XPUSHs(sv_2mortal(newSViv(b))); PUTBACK ; - count = perl_call_pv("AddSubtract", G_ARRAY); + count = call_pv("AddSubtract", G_ARRAY); SPAGAIN ; SP -= count ; @@ -1924,22 +1924,22 @@ refers to the last. =head2 Creating and calling an anonymous subroutine in C -As we've already shown, C<perl_call_sv> can be used to invoke an +As we've already shown, C<call_sv> can be used to invoke an anonymous subroutine. However, our example showed a Perl script invoking an XSUB to perform this operation. Let's see how it can be done inside our C code: ... - SV *cvrv = perl_eval_pv("sub { print 'You will not find me cluttering any namespace!' }", TRUE); + SV *cvrv = eval_pv("sub { print 'You will not find me cluttering any namespace!' }", TRUE); ... - perl_call_sv(cvrv, G_VOID|G_NOARGS); + call_sv(cvrv, G_VOID|G_NOARGS); -C<perl_eval_pv> is used to compile the anonymous subroutine, which -will be the return value as well (read more about C<perl_eval_pv> in -L<perlguts/perl_eval_pv>). Once this code reference is in hand, it +C<eval_pv> is used to compile the anonymous subroutine, which +will be the return value as well (read more about C<eval_pv> in +L<perlapi/eval_pv>). Once this code reference is in hand, it can be mixed in with all the previous examples we've shown. =head1 SEE ALSO @@ -1948,7 +1948,7 @@ L<perlxs>, L<perlguts>, L<perlembed> =head1 AUTHOR -Paul Marquess <F<pmarquess@bfsec.bt.co.uk>> +Paul Marquess Special thanks to the following people who assisted in the creation of the document. diff --git a/contrib/perl5/pod/perlcompile.pod b/contrib/perl5/pod/perlcompile.pod new file mode 100644 index 0000000..697cb80 --- /dev/null +++ b/contrib/perl5/pod/perlcompile.pod @@ -0,0 +1,444 @@ +=head1 NAME + +perlcompile - Introduction to the Perl Compiler-Translator + +=head1 DESCRIPTION + +Perl has always had a compiler: your source is compiled into an +internal form (a parse tree) which is then optimized before being +run. Since version 5.005, Perl has shipped with a module +capable of inspecting the optimized parse tree (C<B>), and this has +been used to write many useful utilities, including a module that lets +you turn your Perl into C source code that can be compiled into an +native executable. + +The C<B> module provides access to the parse tree, and other modules +("back ends") do things with the tree. Some write it out as +bytecode, C source code, or a semi-human-readable text. Another +traverses the parse tree to build a cross-reference of which +subroutines, formats, and variables are used where. Another checks +your code for dubious constructs. Yet another back end dumps the +parse tree back out as Perl source, acting as a source code beautifier +or deobfuscator. + +Because its original purpose was to be a way to produce C code +corresponding to a Perl program, and in turn a native executable, the +C<B> module and its associated back ends are known as "the +compiler", even though they don't really compile anything. +Different parts of the compiler are more accurately a "translator", +or an "inspector", but people want Perl to have a "compiler +option" not an "inspector gadget". What can you do? + +This document covers the use of the Perl compiler: which modules +it comprises, how to use the most important of the back end modules, +what problems there are, and how to work around them. + +=head2 Layout + +The compiler back ends are in the C<B::> hierarchy, and the front-end +(the module that you, the user of the compiler, will sometimes +interact with) is the O module. Some back ends (e.g., C<B::C>) have +programs (e.g., I<perlcc>) to hide the modules' complexity. + +Here are the important back ends to know about, with their status +expressed as a number from 0 (outline for later implementation) to +10 (if there's a bug in it, we're very surprised): + +=over 4 + +=item B::Bytecode + +Stores the parse tree in a machine-independent format, suitable +for later reloading through the ByteLoader module. Status: 5 (some +things work, some things don't, some things are untested). + +=item B::C + +Creates a C source file containing code to rebuild the parse tree +and resume the interpreter. Status: 6 (many things work adequately, +including programs using Tk). + +=item B::CC + +Creates a C source file corresponding to the run time code path in +the parse tree. This is the closest to a Perl-to-C translator there +is, but the code it generates is almost incomprehensible because it +translates the parse tree into a giant switch structure that +manipulates Perl structures. Eventual goal is to reduce (given +sufficient type information in the Perl program) some of the +Perl data structure manipulations into manipulations of C-level +ints, floats, etc. Status: 5 (some things work, including +uncomplicated Tk examples). + +=item B::Lint + +Complains if it finds dubious constructs in your source code. Status: +6 (it works adequately, but only has a very limited number of areas +that it checks). + +=item B::Deparse + +Recreates the Perl source, making an attempt to format it coherently. +Status: 8 (it works nicely, but a few obscure things are missing). + +=item B::Xref + +Reports on the declaration and use of subroutines and variables. +Status: 8 (it works nicely, but still has a few lingering bugs). + +=back + +=head1 Using The Back Ends + +The following sections describe how to use the various compiler back +ends. They're presented roughly in order of maturity, so that the +most stable and proven back ends are described first, and the most +experimental and incomplete back ends are described last. + +The O module automatically enabled the B<-c> flag to Perl, which +prevents Perl from executing your code once it has been compiled. +This is why all the back ends print: + + myperlprogram syntax OK + +before producing any other output. + +=head2 The Cross Referencing Back End + +The cross referencing back end (B::Xref) produces a report on your program, +breaking down declarations and uses of subroutines and variables (and +formats) by file and subroutine. For instance, here's part of the +report from the I<pod2man> program that comes with Perl: + + Subroutine clear_noremap + Package (lexical) + $ready_to_print i1069, 1079 + Package main + $& 1086 + $. 1086 + $0 1086 + $1 1087 + $2 1085, 1085 + $3 1085, 1085 + $ARGV 1086 + %HTML_Escapes 1085, 1085 + +This shows the variables used in the subroutine C<clear_noremap>. The +variable C<$ready_to_print> is a my() (lexical) variable, +B<i>ntroduced (first declared with my()) on line 1069, and used on +line 1079. The variable C<$&> from the main package is used on 1086, +and so on. + +A line number may be prefixed by a single letter: + +=over 4 + +=item i + +Lexical variable introduced (declared with my()) for the first time. + +=item & + +Subroutine or method call. + +=item s + +Subroutine defined. + +=item r + +Format defined. + +=back + +The most useful option the cross referencer has is to save the report +to a separate file. For instance, to save the report on +I<myperlprogram> to the file I<report>: + + $ perl -MO=Xref,-oreport myperlprogram + +=head2 The Decompiling Back End + +The Deparse back end turns your Perl source back into Perl source. It +can reformat along the way, making it useful as a de-obfuscator. The +most basic way to use it is: + + $ perl -MO=Deparse myperlprogram + +You'll notice immediately that Perl has no idea of how to paragraph +your code. You'll have to separate chunks of code from each other +with newlines by hand. However, watch what it will do with +one-liners: + + $ perl -MO=Deparse -e '$op=shift||die "usage: $0 + code [...]";chomp(@ARGV=<>)unless@ARGV; for(@ARGV){$was=$_;eval$op; + die$@ if$@; rename$was,$_ unless$was eq $_}' + -e syntax OK + $op = shift @ARGV || die("usage: $0 code [...]"); + chomp(@ARGV = <ARGV>) unless @ARGV; + foreach $_ (@ARGV) { + $was = $_; + eval $op; + die $@ if $@; + rename $was, $_ unless $was eq $_; + } + +(this is the I<rename> program that comes in the I<eg/> directory +of the Perl source distribution). + +The decompiler has several options for the code it generates. For +instance, you can set the size of each indent from 4 (as above) to +2 with: + + $ perl -MO=Deparse,-si2 myperlprogram + +The B<-p> option adds parentheses where normally they are omitted: + + $ perl -MO=Deparse -e 'print "Hello, world\n"' + -e syntax OK + print "Hello, world\n"; + $ perl -MO=Deparse,-p -e 'print "Hello, world\n"' + -e syntax OK + print("Hello, world\n"); + +See L<B::Deparse> for more information on the formatting options. + +=head2 The Lint Back End + +The lint back end (B::Lint) inspects programs for poor style. One +programmer's bad style is another programmer's useful tool, so options +let you select what is complained about. + +To run the style checker across your source code: + + $ perl -MO=Lint myperlprogram + +To disable context checks and undefined subroutines: + + $ perl -MO=Lint,-context,-undefined-subs myperlprogram + +See L<B::Lint> for information on the options. + +=head2 The Simple C Back End + +This module saves the internal compiled state of your Perl program +to a C source file, which can be turned into a native executable +for that particular platform using a C compiler. The resulting +program links against the Perl interpreter library, so it +will not save you disk space (unless you build Perl with a shared +library) or program size. It may, however, save you startup time. + +The C<perlcc> tool generates such executables by default. + + perlcc myperlprogram.pl + +=head2 The Bytecode Back End + +This back end is only useful if you also have a way to load and +execute the bytecode that it produces. The ByteLoader module provides +this functionality. + +To turn a Perl program into executable byte code, you can use C<perlcc> +with the C<-b> switch: + + perlcc -b myperlprogram.pl + +The byte code is machine independent, so once you have a compiled +module or program, it is as portable as Perl source (assuming that +the user of the module or program has a modern-enough Perl interpreter +to decode the byte code). + +See B<B::Bytecode> for information on options to control the +optimization and nature of the code generated by the Bytecode module. + +=head2 The Optimized C Back End + +The optimized C back end will turn your Perl program's run time +code-path into an equivalent (but optimized) C program that manipulates +the Perl data structures directly. The program will still link against +the Perl interpreter library, to allow for eval(), C<s///e>, +C<require>, etc. + +The C<perlcc> tool generates such executables when using the -opt +switch. To compile a Perl program (ending in C<.pl> +or C<.p>): + + perlcc -opt myperlprogram.pl + +To produce a shared library from a Perl module (ending in C<.pm>): + + perlcc -opt Myperlmodule.pm + +For more information, see L<perlcc> and L<B::CC>. + +=over 4 + +=item B + +This module is the introspective ("reflective" in Java terms) +module, which allows a Perl program to inspect its innards. The +back end modules all use this module to gain access to the compiled +parse tree. You, the user of a back end module, will not need to +interact with B. + +=item O + +This module is the front-end to the compiler's back ends. Normally +called something like this: + + $ perl -MO=Deparse myperlprogram + +This is like saying C<use O 'Deparse'> in your Perl program. + +=item B::Asmdata + +This module is used by the B::Assembler module, which is in turn used +by the B::Bytecode module, which stores a parse-tree as +bytecode for later loading. It's not a back end itself, but rather a +component of a back end. + +=item B::Assembler + +This module turns a parse-tree into data suitable for storing +and later decoding back into a parse-tree. It's not a back end +itself, but rather a component of a back end. It's used by the +I<assemble> program that produces bytecode. + +=item B::Bblock + +This module is used by the B::CC back end. It walks "basic blocks". +A basic block is a series of operations which is known to execute from +start to finish, with no possiblity of branching or halting. + +=item B::Bytecode + +This module is a back end that generates bytecode from a +program's parse tree. This bytecode is written to a file, from where +it can later be reconstructed back into a parse tree. The goal is to +do the expensive program compilation once, save the interpreter's +state into a file, and then restore the state from the file when the +program is to be executed. See L</"The Bytecode Back End"> +for details about usage. + +=item B::C + +This module writes out C code corresponding to the parse tree and +other interpreter internal structures. You compile the corresponding +C file, and get an executable file that will restore the internal +structures and the Perl interpreter will begin running the +program. See L</"The Simple C Back End"> for details about usage. + +=item B::CC + +This module writes out C code corresponding to your program's +operations. Unlike the B::C module, which merely stores the +interpreter and its state in a C program, the B::CC module makes a +C program that does not involve the interpreter. As a consequence, +programs translated into C by B::CC can execute faster than normal +interpreted programs. See L</"The Optimized C Back End"> for +details about usage. + +=item B::Debug + +This module dumps the Perl parse tree in verbose detail to STDOUT. +It's useful for people who are writing their own back end, or who +are learning about the Perl internals. It's not useful to the +average programmer. + +=item B::Deparse + +This module produces Perl source code from the compiled parse tree. +It is useful in debugging and deconstructing other people's code, +also as a pretty-printer for your own source. See +L</"The Decompiling Back End"> for details about usage. + +=item B::Disassembler + +This module turns bytecode back into a parse tree. It's not a back +end itself, but rather a component of a back end. It's used by the +I<disassemble> program that comes with the bytecode. + +=item B::Lint + +This module inspects the compiled form of your source code for things +which, while some people frown on them, aren't necessarily bad enough +to justify a warning. For instance, use of an array in scalar context +without explicitly saying C<scalar(@array)> is something that Lint +can identify. See L</"The Lint Back End"> for details about usage. + +=item B::Showlex + +This module prints out the my() variables used in a function or a +file. To gt a list of the my() variables used in the subroutine +mysub() defined in the file myperlprogram: + + $ perl -MO=Showlex,mysub myperlprogram + +To gt a list of the my() variables used in the file myperlprogram: + + $ perl -MO=Showlex myperlprogram + +[BROKEN] + +=item B::Stackobj + +This module is used by the B::CC module. It's not a back end itself, +but rather a component of a back end. + +=item B::Stash + +This module is used by the L<perlcc> program, which compiles a module +into an executable. B::Stash prints the symbol tables in use by a +program, and is used to prevent B::CC from producing C code for the +B::* and O modules. It's not a back end itself, but rather a +component of a back end. + +=item B::Terse + +This module prints the contents of the parse tree, but without as much +information as B::Debug. For comparison, C<print "Hello, world."> +produced 96 lines of output from B::Debug, but only 6 from B::Terse. + +This module is useful for people who are writing their own back end, +or who are learning about the Perl internals. It's not useful to the +average programmer. + +=item B::Xref + +This module prints a report on where the variables, subroutines, and +formats are defined and used within a program and the modules it +loads. See L</"The Cross Referencing Back End"> for details about +usage. + +=back + +=head1 KNOWN PROBLEMS + +The simple C backend currently only saves typeglobs with alphanumeric +names. + +The optimized C backend outputs code for more modules than it should +(e.g., DirHandle). It also has little hope of properly handling +C<goto LABEL> outside the running subroutine (C<goto &sub> is ok). +C<goto LABEL> currently does not work at all in this backend. +It also creates a huge initialization function that gives +C compilers headaches. Splitting the initialization function gives +better results. Other problems include: unsigned math does not +work correctly; some opcodes are handled incorrectly by default +opcode handling mechanism. + +BEGIN{} blocks are executed while compiling your code. Any external +state that is initialized in BEGIN{}, such as opening files, initiating +database connections etc., do not behave properly. To work around +this, Perl has an INIT{} block that corresponds to code being executed +before your program begins running but after your program has finished +being compiled. Execution order: BEGIN{}, (possible save of state +through compiler back-end), INIT{}, program runs, END{}. + +=head1 AUTHOR + +This document was originally written by Nathan Torkington, and is now +maintained by the perl5-porters mailing list +I<perl5-porters@perl.org>. + +=cut diff --git a/contrib/perl5/pod/perldata.pod b/contrib/perl5/pod/perldata.pod index 9e41c2c..ac444fa 100644 --- a/contrib/perl5/pod/perldata.pod +++ b/contrib/perl5/pod/perldata.pod @@ -6,78 +6,84 @@ perldata - Perl data types =head2 Variable names -Perl has three data structures: scalars, arrays of scalars, and -associative arrays of scalars, known as "hashes". Normal arrays are -indexed by number, starting with 0. (Negative subscripts count from -the end.) Hash arrays are indexed by string. +Perl has three built-in data types: scalars, arrays of scalars, and +associative arrays of scalars, known as "hashes". Normal arrays +are ordered lists of scalars indexed by number, starting with 0 and with +negative subscripts counting from the end. Hashes are unordered +collections of scalar values indexed by their associated string key. -Values are usually referred to by name (or through a named reference). +Values are usually referred to by name, or through a named reference. The first character of the name tells you to what sort of data structure it refers. The rest of the name tells you the particular -value to which it refers. Most often, it consists of a single -I<identifier>, that is, a string beginning with a letter or underscore, -and containing letters, underscores, and digits. In some cases, it -may be a chain of identifiers, separated by C<::> (or by C<'>, but -that's deprecated); all but the last are interpreted as names of -packages, to locate the namespace in which to look -up the final identifier (see L<perlmod/Packages> for details). -It's possible to substitute for a simple identifier an expression -that produces a reference to the value at runtime; this is -described in more detail below, and in L<perlref>. - -There are also special variables whose names don't follow these -rules, so that they don't accidentally collide with one of your -normal variables. Strings that match parenthesized parts of a -regular expression are saved under names containing only digits after -the C<$> (see L<perlop> and L<perlre>). In addition, several special -variables that provide windows into the inner working of Perl have names -containing punctuation characters (see L<perlvar>). - -Scalar values are always named with '$', even when referring to a scalar -that is part of an array. It works like the English word "the". Thus -we have: +value to which it refers. Usually this name is a single I<identifier>, +that is, a string beginning with a letter or underscore, and +containing letters, underscores, and digits. In some cases, it may +be a chain of identifiers, separated by C<::> (or by the slightly +archaic C<'>); all but the last are interpreted as names of packages, +to locate the namespace in which to look up the final identifier +(see L<perlmod/Packages> for details). It's possible to substitute +for a simple identifier, an expression that produces a reference +to the value at runtime. This is described in more detail below +and in L<perlref>. + +Perl also has its own built-in variables whose names don't follow +these rules. They have strange names so they don't accidentally +collide with one of your normal variables. Strings that match +parenthesized parts of a regular expression are saved under names +containing only digits after the C<$> (see L<perlop> and L<perlre>). +In addition, several special variables that provide windows into +the inner working of Perl have names containing punctuation characters +and control characters. These are documented in L<perlvar>. + +Scalar values are always named with '$', even when referring to a +scalar that is part of an array or a hash. The '$' symbol works +semantically like the English word "the" in that it indicates a +single value is expected. $days # the simple scalar value "days" $days[28] # the 29th element of array @days $days{'Feb'} # the 'Feb' value from hash %days $#days # the last index of array @days -but entire arrays or array slices are denoted by '@', which works much like -the word "these" or "those": +Entire arrays (and slices of arrays and hashes) are denoted by '@', +which works much like the word "these" or "those" does in English, +in that it indicates multiple values are expected. @days # ($days[0], $days[1],... $days[n]) - @days[3,4,5] # same as @days[3..5] + @days[3,4,5] # same as ($days[3],$days[4],$days[5]) @days{'a','c'} # same as ($days{'a'},$days{'c'}) -and entire hashes are denoted by '%': +Entire hashes are denoted by '%': %days # (key1, val1, key2, val2 ...) -In addition, subroutines are named with an initial '&', though this is -optional when it's otherwise unambiguous (just as "do" is often -redundant in English). Symbol table entries can be named with an -initial '*', but you don't really care about that yet. - -Every variable type has its own namespace. You can, without fear of -conflict, use the same name for a scalar variable, an array, or a hash -(or, for that matter, a filehandle, a subroutine name, or a label). -This means that $foo and @foo are two different variables. It also -means that C<$foo[1]> is a part of @foo, not a part of $foo. This may -seem a bit weird, but that's okay, because it is weird. - -Because variable and array references always start with '$', '@', or '%', -the "reserved" words aren't in fact reserved with respect to variable -names. (They ARE reserved with respect to labels and filehandles, -however, which don't have an initial special character. You can't have -a filehandle named "log", for instance. Hint: you could say -C<open(LOG,'logfile')> rather than C<open(log,'logfile')>. Using uppercase -filehandles also improves readability and protects you from conflict -with future reserved words.) Case I<IS> significant--"FOO", "Foo", and -"foo" are all different names. Names that start with a letter or -underscore may also contain digits and underscores. +In addition, subroutines are named with an initial '&', though this +is optional when unambiguous, just as the word "do" is often redundant +in English. Symbol table entries can be named with an initial '*', +but you don't really care about that yet (if ever :-). + +Every variable type has its own namespace, as do several +non-variable identifiers. This means that you can, without fear +of conflict, use the same name for a scalar variable, an array, or +a hash--or, for that matter, for a filehandle, a directory handle, a +subroutine name, a format name, or a label. This means that $foo +and @foo are two different variables. It also means that C<$foo[1]> +is a part of @foo, not a part of $foo. This may seem a bit weird, +but that's okay, because it is weird. + +Because variable references always start with '$', '@', or '%', the +"reserved" words aren't in fact reserved with respect to variable +names. They I<are> reserved with respect to labels and filehandles, +however, which don't have an initial special character. You can't +have a filehandle named "log", for instance. Hint: you could say +C<open(LOG,'logfile')> rather than C<open(log,'logfile')>. Using +uppercase filehandles also improves readability and protects you +from conflict with future reserved words. Case I<is> significant--"FOO", +"Foo", and "foo" are all different names. Names that start with a +letter or underscore may also contain digits and underscores. It is possible to replace such an alphanumeric name with an expression -that returns a reference to an object of that type. For a description +that returns a reference to the appropriate type. For a description of this, see L<perlref>. Names that start with a digit may contain only more digits. Names @@ -90,89 +96,108 @@ current process id.) The interpretation of operations and values in Perl sometimes depends on the requirements of the context around the operation or value. -There are two major contexts: scalar and list. Certain operations +There are two major contexts: list and scalar. Certain operations return list values in contexts wanting a list, and scalar values -otherwise. (If this is true of an operation it will be mentioned in -the documentation for that operation.) In other words, Perl overloads +otherwise. If this is true of an operation it will be mentioned in +the documentation for that operation. In other words, Perl overloads certain operations based on whether the expected return value is -singular or plural. (Some words in English work this way, like "fish" -and "sheep".) +singular or plural. Some words in English work this way, like "fish" +and "sheep". In a reciprocal fashion, an operation provides either a scalar or a list context to each of its arguments. For example, if you say int( <STDIN> ) -the integer operation provides a scalar context for the E<lt>STDINE<gt> +the integer operation provides scalar context for the <> operator, which responds by reading one line from STDIN and passing it back to the integer operation, which will then find the integer value of that line and return that. If, on the other hand, you say sort( <STDIN> ) -then the sort operation provides a list context for E<lt>STDINE<gt>, which +then the sort operation provides list context for <>, which will proceed to read every line available up to the end of file, and pass that list of lines back to the sort routine, which will then sort those lines and return them as a list to whatever the context of the sort was. -Assignment is a little bit special in that it uses its left argument to -determine the context for the right argument. Assignment to a scalar -evaluates the righthand side in a scalar context, while assignment to -an array or array slice evaluates the righthand side in a list -context. Assignment to a list also evaluates the righthand side in a -list context. - -User defined subroutines may choose to care whether they are being -called in a scalar or list context, but most subroutines do not -need to care, because scalars are automatically interpolated into -lists. See L<perlfunc/wantarray>. +Assignment is a little bit special in that it uses its left argument +to determine the context for the right argument. Assignment to a +scalar evaluates the right-hand side in scalar context, while +assignment to an array or hash evaluates the righthand side in list +context. Assignment to a list (or slice, which is just a list +anyway) also evaluates the righthand side in list context. + +When you use the C<use warnings> pragma or Perl's B<-w> command-line +option, you may see warnings +about useless uses of constants or functions in "void context". +Void context just means the value has been discarded, such as a +statement containing only C<"fred";> or C<getpwuid(0);>. It still +counts as scalar context for functions that care whether or not +they're being called in list context. + +User-defined subroutines may choose to care whether they are being +called in a void, scalar, or list context. Most subroutines do not +need to bother, though. That's because both scalars and lists are +automatically interpolated into lists. See L<perlfunc/wantarray> +for how you would dynamically discern your function's calling +context. =head2 Scalar values -All data in Perl is a scalar or an array of scalars or a hash of scalars. -Scalar variables may contain various kinds of singular data, such as -numbers, strings, and references. In general, conversion from one form to -another is transparent. (A scalar may not contain multiple values, but -may contain a reference to an array or hash containing multiple values.) -Because of the automatic conversion of scalars, operations, and functions -that return scalars don't need to care (and, in fact, can't care) whether -the context is looking for a string or a number. - -Scalars aren't necessarily one thing or another. There's no place to -declare a scalar variable to be of type "string", or of type "number", or -type "filehandle", or anything else. Perl is a contextually polymorphic -language whose scalars can be strings, numbers, or references (which -includes objects). While strings and numbers are considered pretty -much the same thing for nearly all purposes, references are strongly-typed -uncastable pointers with builtin reference-counting and destructor -invocation. +All data in Perl is a scalar, an array of scalars, or a hash of +scalars. A scalar may contain one single value in any of three +different flavors: a number, a string, or a reference. In general, +conversion from one form to another is transparent. Although a +scalar may not directly hold multiple values, it may contain a +reference to an array or hash which in turn contains multiple values. + +Scalars aren't necessarily one thing or another. There's no place +to declare a scalar variable to be of type "string", type "number", +type "reference", or anything else. Because of the automatic +conversion of scalars, operations that return scalars don't need +to care (and in fact, cannot care) whether their caller is looking +for a string, a number, or a reference. Perl is a contextually +polymorphic language whose scalars can be strings, numbers, or +references (which includes objects). Although strings and numbers +are considered pretty much the same thing for nearly all purposes, +references are strongly-typed, uncastable pointers with builtin +reference-counting and destructor invocation. A scalar value is interpreted as TRUE in the Boolean sense if it is not the null string or the number 0 (or its string equivalent, "0"). The -Boolean context is just a special kind of scalar context. - -There are actually two varieties of null scalars: defined and -undefined. Undefined null scalars are returned when there is no real -value for something, such as when there was an error, or at end of -file, or when you refer to an uninitialized variable or element of an -array. An undefined null scalar may become defined the first time you -use it as if it were defined, but prior to that you can use the -defined() operator to determine whether the value is defined or not. - -To find out whether a given string is a valid nonzero number, it's usually -enough to test it against both numeric 0 and also lexical "0" (although -this will cause B<-w> noises). That's because strings that aren't -numbers count as 0, just as they do in B<awk>: +Boolean context is just a special kind of scalar context where no +conversion to a string or a number is ever performed. + +There are actually two varieties of null strings (sometimes referred +to as "empty" strings), a defined one and an undefined one. The +defined version is just a string of length zero, such as C<"">. +The undefined version is the value that indicates that there is +no real value for something, such as when there was an error, or +at end of file, or when you refer to an uninitialized variable or +element of an array or hash. Although in early versions of Perl, +an undefined scalar could become defined when first used in a +place expecting a defined value, this no longer happens except for +rare cases of autovivification as explained in L<perlref>. You can +use the defined() operator to determine whether a scalar value is +defined (this has no meaning on arrays or hashes), and the undef() +operator to produce an undefined value. + +To find out whether a given string is a valid non-zero number, it's +sometimes enough to test it against both numeric 0 and also lexical +"0" (although this will cause B<-w> noises). That's because strings +that aren't numbers count as 0, just as they do in B<awk>: if ($str == 0 && $str ne "0") { warn "That doesn't look like a number"; } -That's usually preferable because otherwise you won't treat IEEE notations -like C<NaN> or C<Infinity> properly. At other times you might prefer to -use the POSIX::strtod function or a regular expression to check whether -data is numeric. See L<perlre> for details on regular expressions. +That method may be best because otherwise you won't treat IEEE +notations like C<NaN> or C<Infinity> properly. At other times, you +might prefer to determine whether string data can be used numerically +by calling the POSIX::strtod() function or by inspecting your string +with a regular expression (as documented in L<perlre>). warn "has nondigits" if /\D/; warn "not a natural number" unless /^\d+$/; # rejects -3 @@ -183,27 +208,30 @@ data is numeric. See L<perlre> for details on regular expressions. warn "not a C float" unless /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/; -The length of an array is a scalar value. You may find the length of -array @days by evaluating C<$#days>, as in B<csh>. (Actually, it's not -the length of the array, it's the subscript of the last element, because -there is (ordinarily) a 0th element.) Assigning to C<$#days> changes the -length of the array. Shortening an array by this method destroys -intervening values. Lengthening an array that was previously shortened -I<NO LONGER> recovers the values that were in those elements. (It used to -in Perl 4, but we had to break this to make sure destructors were -called when expected.) You can also gain some miniscule measure of efficiency by -pre-extending an array that is going to get big. (You can also extend -an array by assigning to an element that is off the end of the array.) -You can truncate an array down to nothing by assigning the null list () -to it. The following are equivalent: +The length of an array is a scalar value. You may find the length +of array @days by evaluating C<$#days>, as in B<csh>. Technically +speaking, this isn't the length of the array; it's the subscript +of the last element, since there is ordinarily a 0th element. +Assigning to C<$#days> actually changes the length of the array. +Shortening an array this way destroys intervening values. Lengthening +an array that was previously shortened does not recover values +that were in those elements. (It used to do so in Perl 4, but we +had to break this to make sure destructors were called when expected.) + +You can also gain some miniscule measure of efficiency by pre-extending +an array that is going to get big. You can also extend an array +by assigning to an element that is off the end of the array. You +can truncate an array down to nothing by assigning the null list +() to it. The following are equivalent: @whatever = (); $#whatever = -1; -If you evaluate a named array in a scalar context, it returns the length of -the array. (Note that this is not true of lists, which return the -last value, like the C comma operator, nor of built-in functions, which return -whatever they feel like returning.) The following is always true: +If you evaluate an array in scalar context, it returns the length +of the array. (Note that this is not true of lists, which return +the last value, like the C comma operator, nor of built-in functions, +which return whatever they feel like returning.) The following is +always true: scalar(@whatever) == $#whatever - $[ + 1; @@ -214,21 +242,21 @@ So in general you can assume that scalar(@whatever) == $#whatever + 1; -Some programmers choose to use an explicit conversion so nothing's -left to doubt: +Some programmers choose to use an explicit conversion so as to +leave nothing to doubt: $element_count = scalar(@whatever); -If you evaluate a hash in a scalar context, it returns a value that is -true if and only if the hash contains any key/value pairs. (If there -are any key/value pairs, the value returned is a string consisting of -the number of used buckets and the number of allocated buckets, separated -by a slash. This is pretty much useful only to find out whether Perl's -(compiled in) hashing algorithm is performing poorly on your data set. -For example, you stick 10,000 things in a hash, but evaluating %HASH in -scalar context reveals "1/16", which means only one out of sixteen buckets -has been touched, and presumably contains all 10,000 of your items. This -isn't supposed to happen.) +If you evaluate a hash in scalar context, it returns false if the +hash is empty. If there are any key/value pairs, it returns true; +more precisely, the value returned is a string consisting of the +number of used buckets and the number of allocated buckets, separated +by a slash. This is pretty much useful only to find out whether +Perl's internal hashing algorithm is performing poorly on your data +set. For example, you stick 10,000 things in a hash, but evaluating +%HASH in scalar context reveals C<"1/16">, which means only one out +of sixteen buckets has been touched, and presumably contains all +10,000 of your items. This isn't supposed to happen. You can preallocate space for a hash by assigning to the keys() function. This rounds up the allocated bucked to the next power of two: @@ -237,35 +265,36 @@ This rounds up the allocated bucked to the next power of two: =head2 Scalar value constructors -Numeric literals are specified in any of the customary floating point or +Numeric literals are specified in any of the following floating point or integer formats: 12345 12345.67 - .23E-10 - 0xffff # hex - 0377 # octal - 4_294_967_296 # underline for legibility + .23E-10 # a very small number + 4_294_967_296 # underline for legibility + 0xff # hex + 0377 # octal + 0b011011 # binary String literals are usually delimited by either single or double -quotes. They work much like shell quotes: double-quoted string -literals are subject to backslash and variable substitution; -single-quoted strings are not (except for "C<\'>" and "C<\\>"). -The usual Unix backslash rules apply for making characters such as -newline, tab, etc., as well as some more exotic forms. See -L<perlop/"Quote and Quotelike Operators"> for a list. - -Octal or hex representations in string literals (e.g. '0xffff') are not -automatically converted to their integer representation. The hex() and -oct() functions make these conversions for you. See L<perlfunc/hex> and -L<perlfunc/oct> for more details. +quotes. They work much like quotes in the standard Unix shells: +double-quoted string literals are subject to backslash and variable +substitution; single-quoted strings are not (except for C<\'> and +C<\\>). The usual C-style backslash rules apply for making +characters such as newline, tab, etc., as well as some more exotic +forms. See L<perlop/"Quote and Quote-like Operators"> for a list. + +Hexadecimal, octal, or binary, representations in string literals +(e.g. '0xff') are not automatically converted to their integer +representation. The hex() and oct() functions make these conversions +for you. See L<perlfunc/hex> and L<perlfunc/oct> for more details. You can also embed newlines directly in your strings, i.e., they can end on a different line than they begin. This is nice, but if you forget your trailing quote, the error will not be reported until Perl finds another line containing the quote character, which may be much further on in the script. Variable substitution inside strings is limited to -scalar variables, arrays, and array slices. (In other words, +scalar variables, arrays, and array or hash slices. (In other words, names beginning with $ or @, followed by an optional bracketed expression as a subscript.) The following code segment prints out "The price is $Z<>100." @@ -273,47 +302,78 @@ price is $Z<>100." $Price = '$100'; # not interpreted print "The price is $Price.\n"; # interpreted -As in some shells, you can put curly brackets around the name to -delimit it from following alphanumerics. In fact, an identifier -within such curlies is forced to be a string, as is any single -identifier within a hash subscript. Our earlier example, - - $days{'Feb'} - -can be written as - - $days{Feb} - -and the quotes will be assumed automatically. But anything more complicated -in the subscript will be interpreted as an expression. - -Note that a -single-quoted string must be separated from a preceding word by a -space, because single quote is a valid (though deprecated) character in -a variable name (see L<perlmod/Packages>). - -Three special literals are __FILE__, __LINE__, and __PACKAGE__, which +As in some shells, you can enclose the variable name in braces to +disambiguate it from following alphanumerics. You must also do +this when interpolating a variable into a string to separate the +variable name from a following double-colon or an apostrophe, since +these would be otherwise treated as a package separator: + + $who = "Larry"; + print PASSWD "${who}::0:0:Superuser:/:/bin/perl\n"; + print "We use ${who}speak when ${who}'s here.\n"; + +Without the braces, Perl would have looked for a $whospeak, a +C<$who::0>, and a C<$who's> variable. The last two would be the +$0 and the $s variables in the (presumably) non-existent package +C<who>. + +In fact, an identifier within such curlies is forced to be a string, +as is any simple identifier within a hash subscript. Neither need +quoting. Our earlier example, C<$days{'Feb'}> can be written as +C<$days{Feb}> and the quotes will be assumed automatically. But +anything more complicated in the subscript will be interpreted as +an expression. + +A literal of the form C<v1.20.300.4000> is parsed as a string composed +of characters with the specified ordinals. This provides an alternative, +more readable way to construct strings, rather than use the somewhat less +readable interpolation form C<"\x{1}\x{14}\x{12c}\x{fa0}">. This is useful +for representing Unicode strings, and for comparing version "numbers" +using the string comparison operators, C<cmp>, C<gt>, C<lt> etc. +If there are two or more dots in the literal, the leading C<v> may be +omitted. + + print v9786; # prints UTF-8 encoded SMILEY, "\x{263a}" + print v102.111.111; # prints "foo" + print 102.111.111; # same + +Such literals are accepted by both C<require> and C<use> for +doing a version check. The C<$^V> special variable also contains the +running Perl interpreter's version in this form. See L<perlvar/$^V>. + +The special literals __FILE__, __LINE__, and __PACKAGE__ represent the current filename, line number, and package name at that point in your program. They may be used only as separate tokens; they will not be interpolated into strings. If there is no current package -(due to an empty C<package;> directive), __PACKAGE__ is the undefined value. - -The tokens __END__ and __DATA__ may be used to indicate the logical end -of the script before the actual end of file. Any following text is -ignored, but may be read via a DATA filehandle: main::DATA for __END__, -or PACKNAME::DATA (where PACKNAME is the current package) for __DATA__. -The two control characters ^D and ^Z are synonyms for __END__ (or -__DATA__ in a module). See L<SelfLoader> for more description of -__DATA__, and an example of its use. Note that you cannot read from the -DATA filehandle in a BEGIN block: the BEGIN block is executed as soon as -it is seen (during compilation), at which point the corresponding +(due to an empty C<package;> directive), __PACKAGE__ is the undefined +value. + +The two control characters ^D and ^Z, and the tokens __END__ and __DATA__ +may be used to indicate the logical end of the script before the actual +end of file. Any following text is ignored. + +Text after __DATA__ but may be read via the filehandle C<PACKNAME::DATA>, +where C<PACKNAME> is the package that was current when the __DATA__ +token was encountered. The filehandle is left open pointing to the +contents after __DATA__. It is the program's responsibility to +C<close DATA> when it is done reading from it. For compatibility with +older scripts written before __DATA__ was introduced, __END__ behaves +like __DATA__ in the toplevel script (but not in files loaded with +C<require> or C<do>) and leaves the remaining contents of the +file accessible via C<main::DATA>. + +See L<SelfLoader> for more description of __DATA__, and +an example of its use. Note that you cannot read from the DATA +filehandle in a BEGIN block: the BEGIN block is executed as soon +as it is seen (during compilation), at which point the corresponding __DATA__ (or __END__) token has not yet been seen. A word that has no other interpretation in the grammar will be treated as if it were a quoted string. These are known as "barewords". As with filehandles and labels, a bareword that consists entirely of lowercase letters risks conflict with future reserved -words, and if you use the B<-w> switch, Perl will warn you about any +words, and if you use the C<use warnings> pragma or the B<-w> switch, +Perl will warn you about any such words. Some people may wish to outlaw barewords entirely. If you say @@ -324,34 +384,34 @@ produces a compile-time error instead. The restriction lasts to the end of the enclosing block. An inner block may countermand this by saying C<no strict 'subs'>. -Array variables are interpolated into double-quoted strings by joining all -the elements of the array with the delimiter specified in the C<$"> -variable (C<$LIST_SEPARATOR> in English), space by default. The following -are equivalent: +Arrays and slices are interpolated into double-quoted strings +by joining the elements with the delimiter specified in the C<$"> +variable (C<$LIST_SEPARATOR> in English), space by default. The +following are equivalent: - $temp = join($",@ARGV); + $temp = join($", @ARGV); system "echo $temp"; system "echo @ARGV"; Within search patterns (which also undergo double-quotish substitution) -there is a bad ambiguity: Is C</$foo[bar]/> to be interpreted as +there is an unfortunate ambiguity: Is C</$foo[bar]/> to be interpreted as C</${foo}[bar]/> (where C<[bar]> is a character class for the regular expression) or as C</${foo[bar]}/> (where C<[bar]> is the subscript to array @foo)? If @foo doesn't otherwise exist, then it's obviously a character class. If @foo exists, Perl takes a good guess about C<[bar]>, and is almost always right. If it does guess wrong, or if you're just plain paranoid, you can force the correct interpretation with curly -brackets as above. +braces as above. -A line-oriented form of quoting is based on the shell "here-doc" -syntax. Following a C<E<lt>E<lt>> you specify a string to terminate +A line-oriented form of quoting is based on the shell "here-document" +syntax. Following a C<< << >> you specify a string to terminate the quoted material, and all lines following the current line down to the terminating string are the value of the item. The terminating string may be either an identifier (a word), or some quoted text. If quoted, the type of quotes you use determines the treatment of the text, just as in regular quoting. An unquoted identifier works like -double quotes. There must be no space between the C<E<lt>E<lt>> and +double quotes. There must be no space between the C<< << >> and the identifier. (If you put a space it will be treated as a null identifier, which is valid, and matches the first empty line.) The terminating string must appear by itself (unquoted and with no @@ -392,6 +452,14 @@ try to do this: ABC + 20; +If you want your here-docs to be indented with the +rest of the code, you'll need to remove leading whitespace +from each line manually: + + ($quote = <<'FINIS') =~ s/^\s+//gm; + The Road goes ever on and on, + down from the door where it began. + FINIS =head2 List value constructors @@ -400,19 +468,19 @@ List values are denoted by separating individual values by commas (LIST) -In a context not requiring a list value, the value of the list -literal is the value of the final element, as with the C comma operator. -For example, +In a context not requiring a list value, the value of what appears +to be a list literal is simply the value of the final element, as +with the C comma operator. For example, @foo = ('cc', '-E', $bar); -assigns the entire list value to array foo, but +assigns the entire list value to array @foo, but $foo = ('cc', '-E', $bar); -assigns the value of variable bar to variable foo. Note that the value -of an actual array in a scalar context is the length of the array; the -following assigns the value 3 to $foo: +assigns the value of variable $bar to the scalar variable $foo. +Note that the value of an actual array in scalar context is the +length of the array; the following assigns the value 3 to $foo: @foo = ('cc', '-E', $bar); $foo = @foo; # $foo gets 3 @@ -426,8 +494,19 @@ list literal, so that you can say: 3, ); +To use a here-document to assign an array, one line per element, +you might use an approach like this: + + @sauces = <<End_Lines =~ m/(\S.*\S)/g; + normal tomato + spicy tomato + green chile + pesto + white wine + End_Lines + LISTs do automatic interpolation of sublists. That is, when a LIST is -evaluated, each element of the list is evaluated in a list context, and +evaluated, each element of the list is evaluated in list context, and the resulting list value is interpolated into LIST just as if each individual element were a member of LIST. Thus arrays and hashes lose their identity in a LIST--the list @@ -436,7 +515,7 @@ identity in a LIST--the list contains all the elements of @foo followed by all the elements of @bar, followed by all the elements returned by the subroutine named SomeSub -called in a list context, followed by the key/value pairs of %glarch. +called in list context, followed by the key/value pairs of %glarch. To make a list reference that does I<NOT> interpolate, see L<perlref>. The null list is represented by (). Interpolating it in a list @@ -459,25 +538,26 @@ put the list in parentheses to avoid ambiguity. For example: # A "reverse comma operator". return (pop(@foo),pop(@foo))[0]; -You may assign to C<undef> in a list. This is useful for throwing -away some of the return values of a function: - - ($dev, $ino, undef, undef, $uid, $gid) = stat($file); - -Lists may be assigned to if and only if each element of the list -is legal to assign to: +Lists may be assigned to only when each element of the list +is itself legal to assign to: ($a, $b, $c) = (1, 2, 3); ($map{'red'}, $map{'blue'}, $map{'green'}) = (0x00f, 0x0f0, 0xf00); -List assignment in a scalar context returns the number of elements +An exception to this is that you may assign to C<undef> in a list. +This is useful for throwing away some of the return values of a +function: + + ($dev, $ino, undef, undef, $uid, $gid) = stat($file); + +List assignment in scalar context returns the number of elements produced by the expression on the right side of the assignment: $x = (($foo,$bar) = (3,2,1)); # set $x to 3, not 2 $x = (($foo,$bar) = f()); # set $x to f()'s return count -This is very handy when you want to do a list assignment in a Boolean +This is handy when you want to do a list assignment in a Boolean context, because most list functions return a null list when finished, which when assigned produces a 0, which is interpreted as FALSE. @@ -487,24 +567,24 @@ The final element may be an array or a hash: my($a, $b, %rest) = @_; You can actually put an array or hash anywhere in the list, but the first one -in the list will soak up all the values, and anything after it will get -a null value. This may be useful in a local() or my(). +in the list will soak up all the values, and anything after it will become +undefined. This may be useful in a my() or local(). -A hash literal contains pairs of values to be interpreted -as a key and a value: +A hash can be initialized using a literal list holding pairs of +items to be interpreted as a key and a value: # same as map assignment above %map = ('red',0x00f,'blue',0x0f0,'green',0xf00); -While literal lists and named arrays are usually interchangeable, that's +While literal lists and named arrays are often interchangeable, that's not the case for hashes. Just because you can subscript a list value like a normal array does not mean that you can subscript a list value as a hash. Likewise, hashes included as parts of other lists (including parameters lists and return lists from functions) always flatten out into key/value pairs. That's why it's good to use references sometimes. -It is often more readable to use the C<=E<gt>> operator between key/value -pairs. The C<=E<gt>> operator is mostly just a more visually distinctive +It is often more readable to use the C<< => >> operator between key/value +pairs. The C<< => >> operator is mostly just a more visually distinctive synonym for a comma, but it also arranges for its left-hand operand to be interpreted as a string--if it's a bareword that would be a legal identifier. This makes it nice for initializing hashes: @@ -537,6 +617,82 @@ Note that just because a hash is initialized in that order doesn't mean that it comes out in that order. See L<perlfunc/sort> for examples of how to arrange for an output ordering. +=head2 Slices + +A common way to access an array or a hash is one scalar element at a +time. You can also subscript a list to get a single element from it. + + $whoami = $ENV{"USER"}; # one element from the hash + $parent = $ISA[0]; # one element from the array + $dir = (getpwnam("daemon"))[7]; # likewise, but with list + +A slice accesses several elements of a list, an array, or a hash +simultaneously using a list of subscripts. It's more convenient +than writing out the individual elements as a list of separate +scalar values. + + ($him, $her) = @folks[0,-1]; # array slice + @them = @folks[0 .. 3]; # array slice + ($who, $home) = @ENV{"USER", "HOME"}; # hash slice + ($uid, $dir) = (getpwnam("daemon"))[2,7]; # list slice + +Since you can assign to a list of variables, you can also assign to +an array or hash slice. + + @days[3..5] = qw/Wed Thu Fri/; + @colors{'red','blue','green'} + = (0xff0000, 0x0000ff, 0x00ff00); + @folks[0, -1] = @folks[-1, 0]; + +The previous assignments are exactly equivalent to + + ($days[3], $days[4], $days[5]) = qw/Wed Thu Fri/; + ($colors{'red'}, $colors{'blue'}, $colors{'green'}) + = (0xff0000, 0x0000ff, 0x00ff00); + ($folks[0], $folks[-1]) = ($folks[0], $folks[-1]); + +Since changing a slice changes the original array or hash that it's +slicing, a C<foreach> construct will alter some--or even all--of the +values of the array or hash. + + foreach (@array[ 4 .. 10 ]) { s/peter/paul/ } + + foreach (@hash{keys %hash}) { + s/^\s+//; # trim leading whitespace + s/\s+$//; # trim trailing whitespace + s/(\w+)/\u\L$1/g; # "titlecase" words + } + +A slice of an empty list is still an empty list. Thus: + + @a = ()[1,0]; # @a has no elements + @b = (@a)[0,1]; # @b has no elements + @c = (0,1)[2,3]; # @c has no elements + +But: + + @a = (1)[1,0]; # @a has two elements + @b = (1,undef)[1,0,2]; # @b has three elements + +This makes it easy to write loops that terminate when a null list +is returned: + + while ( ($home, $user) = (getpwent)[7,0]) { + printf "%-8s %s\n", $user, $home; + } + +As noted earlier in this document, the scalar sense of list assignment +is the number of elements on the right-hand side of the assignment. +The null list contains no elements, so when the password file is +exhausted, the result is 0, not 2. + +If you're confused about why you use an '@' there on a hash slice +instead of a '%', think of it like this. The type of bracket (square +or curly) governs whether it's an array or a hash being looked at. +On the other hand, the leading symbol ('$' or '@') on the array or +hash indicates whether you are getting back a singular value (a +scalar) or a plural one (a list). + =head2 Typeglobs and Filehandles Perl uses an internal type called a I<typeglob> to hold an entire @@ -562,7 +718,7 @@ make @Here::blue an alias for @There::green, or %Here::blue an alias for of this. Strange though this may seem, this is the basis for the whole module import/export system. -Another use for typeglobs is to to pass filehandles into a function or +Another use for typeglobs is to pass filehandles into a function or to create new filehandles. If you need to use a typeglob to save away a filehandle, do it this way: @@ -581,23 +737,50 @@ For example: sub newopen { my $path = shift; - local *FH; # not my! + local *FH; # not my! open (FH, $path) or return undef; return *FH; } $fh = newopen('/etc/passwd'); -Now that we have the *foo{THING} notation, typeglobs aren't used as much +Now that we have the C<*foo{THING}> notation, typeglobs aren't used as much for filehandle manipulations, although they're still needed to pass brand new file and directory handles into or out of functions. That's because -*HANDLE{IO} only works if HANDLE has already been used as a handle. -In other words, *FH can be used to create new symbol table entries, -but *foo{THING} cannot. +C<*HANDLE{IO}> only works if HANDLE has already been used as a handle. +In other words, C<*FH> must be used to create new symbol table entries; +C<*foo{THING}> cannot. When in doubt, use C<*FH>. + +All functions that are capable of creating filehandles (open(), +opendir(), pipe(), socketpair(), sysopen(), socket(), and accept()) +automatically create an anonymous filehandle if the handle passed to +them is an uninitialized scalar variable. This allows the constructs +such as C<open(my $fh, ...)> and C<open(local $fh,...)> to be used to +create filehandles that will conveniently be closed automatically when +the scope ends, provided there are no other references to them. This +largely eliminates the need for typeglobs when opening filehandles +that must be passed around, as in the following example: + + sub myopen { + open my $fh, "@_" + or die "Can't open '@_': $!"; + return $fh; + } + + { + my $f = myopen("</etc/motd"); + print <$f>; + # $f implicitly closed here + } + +Another way to create anonymous filehandles is with the Symbol +module or with the IO::Handle module and its ilk. These modules +have the advantage of not hiding different types of the same name +during the local(). See the bottom of L<perlfunc/open()> for an +example. -Another way to create anonymous filehandles is with the IO::Handle -module and its ilk. These modules have the advantage of not hiding -different types of the same name during the local(). See the bottom of -L<perlfunc/open()> for an example. +=head1 SEE ALSO -See L<perlref>, L<perlsub>, and L<perlmod/"Symbol Tables"> for more -discussion on typeglobs and the *foo{THING} syntax. +See L<perlvar> for a description of Perl's built-in variables and +a discussion of legal variable names. See L<perlref>, L<perlsub>, +and L<perlmod/"Symbol Tables"> for more discussion on typeglobs and +the C<*foo{THING}> syntax. diff --git a/contrib/perl5/pod/perldbmfilter.pod b/contrib/perl5/pod/perldbmfilter.pod new file mode 100644 index 0000000..3350596 --- /dev/null +++ b/contrib/perl5/pod/perldbmfilter.pod @@ -0,0 +1,168 @@ +=head1 NAME + +perldbmfilter - Perl DBM Filters + +=head1 SYNOPSIS + + $db = tie %hash, 'DBM', ... + + $old_filter = $db->filter_store_key ( sub { ... } ) ; + $old_filter = $db->filter_store_value( sub { ... } ) ; + $old_filter = $db->filter_fetch_key ( sub { ... } ) ; + $old_filter = $db->filter_fetch_value( sub { ... } ) ; + +=head1 DESCRIPTION + +The four C<filter_*> methods shown above are available in all the DBM +modules that ship with Perl, namely DB_File, GDBM_File, NDBM_File, +ODBM_File and SDBM_File. + +Each of the methods work identically, and are used to install (or +uninstall) a single DBM Filter. The only difference between them is the +place that the filter is installed. + +To summarise: + +=over 5 + +=item B<filter_store_key> + +If a filter has been installed with this method, it will be invoked +every time you write a key to a DBM database. + +=item B<filter_store_value> + +If a filter has been installed with this method, it will be invoked +every time you write a value to a DBM database. + + +=item B<filter_fetch_key> + +If a filter has been installed with this method, it will be invoked +every time you read a key from a DBM database. + +=item B<filter_fetch_value> + +If a filter has been installed with this method, it will be invoked +every time you read a value from a DBM database. + +=back + +You can use any combination of the methods from none to all four. + +All filter methods return the existing filter, if present, or C<undef> +in not. + +To delete a filter pass C<undef> to it. + +=head2 The Filter + +When each filter is called by Perl, a local copy of C<$_> will contain +the key or value to be filtered. Filtering is achieved by modifying +the contents of C<$_>. The return code from the filter is ignored. + +=head2 An Example -- the NULL termination problem. + +DBM Filters are useful for a class of problems where you I<always> +want to make the same transformation to all keys, all values or both. + +For example, consider the following scenario. You have a DBM database +that you need to share with a third-party C application. The C application +assumes that I<all> keys and values are NULL terminated. Unfortunately +when Perl writes to DBM databases it doesn't use NULL termination, so +your Perl application will have to manage NULL termination itself. When +you write to the database you will have to use something like this: + + $hash{"$key\0"} = "$value\0" ; + +Similarly the NULL needs to be taken into account when you are considering +the length of existing keys/values. + +It would be much better if you could ignore the NULL terminations issue +in the main application code and have a mechanism that automatically +added the terminating NULL to all keys and values whenever you write to +the database and have them removed when you read from the database. As I'm +sure you have already guessed, this is a problem that DBM Filters can +fix very easily. + + use strict ; + use warnings ; + use SDBM_File ; + use Fcntl ; + + my %hash ; + my $filename = "/tmp/filt" ; + unlink $filename ; + + my $db = tie(%hash, 'SDBM_File', $filename, O_RDWR|O_CREAT, 0640) + or die "Cannot open $filename: $!\n" ; + + # Install DBM Filters + $db->filter_fetch_key ( sub { s/\0$// } ) ; + $db->filter_store_key ( sub { $_ .= "\0" } ) ; + $db->filter_fetch_value( + sub { no warnings 'uninitialized' ;s/\0$// } ) ; + $db->filter_store_value( sub { $_ .= "\0" } ) ; + + $hash{"abc"} = "def" ; + my $a = $hash{"ABC"} ; + # ... + undef $db ; + untie %hash ; + +The code above uses SDBM_File, but it will work with any of the DBM +modules. + +Hopefully the contents of each of the filters should be +self-explanatory. Both "fetch" filters remove the terminating NULL, +and both "store" filters add a terminating NULL. + + +=head2 Another Example -- Key is a C int. + +Here is another real-life example. By default, whenever Perl writes to +a DBM database it always writes the key and value as strings. So when +you use this: + + $hash{12345} = "soemthing" ; + +the key 12345 will get stored in the DBM database as the 5 byte string +"12345". If you actually want the key to be stored in the DBM database +as a C int, you will have to use C<pack> when writing, and C<unpack> +when reading. + +Here is a DBM Filter that does it: + + use strict ; + use warnings ; + use DB_File ; + my %hash ; + my $filename = "/tmp/filt" ; + unlink $filename ; + + + my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH + or die "Cannot open $filename: $!\n" ; + + $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ; + $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ; + $hash{123} = "def" ; + # ... + undef $db ; + untie %hash ; + +The code above uses DB_File, but again it will work with any of the +DBM modules. + +This time only two filters have been used -- we only need to manipulate +the contents of the key, so it wasn't necessary to install any value +filters. + +=head1 SEE ALSO + +L<DB_File>, L<GDBM_File>, L<NDBM_File>, L<ODBM_File> and L<SDBM_File>. + +=head1 AUTHOR + +Paul Marquess + diff --git a/contrib/perl5/pod/perldebguts.pod b/contrib/perl5/pod/perldebguts.pod new file mode 100644 index 0000000..b74f3ef --- /dev/null +++ b/contrib/perl5/pod/perldebguts.pod @@ -0,0 +1,923 @@ +=head1 NAME + +perldebguts - Guts of Perl debugging + +=head1 DESCRIPTION + +This is not the perldebug(1) manpage, which tells you how to use +the debugger. This manpage describes low-level details ranging +between difficult and impossible for anyone who isn't incredibly +intimate with Perl's guts to understand. Caveat lector. + +=head1 Debugger Internals + +Perl has special debugging hooks at compile-time and run-time used +to create debugging environments. These hooks are not to be confused +with the I<perl -Dxxx> command described in L<perlrun>, which are +usable only if a special Perl built per the instructions the +F<INSTALL> podpage in the Perl source tree. + +For example, whenever you call Perl's built-in C<caller> function +from the package DB, the arguments that the corresponding stack +frame was called with are copied to the the @DB::args array. The +general mechanisms is enabled by calling Perl with the B<-d> switch, the +following additional features are enabled (cf. L<perlvar/$^P>): + +=over + +=item * + +Perl inserts the contents of C<$ENV{PERL5DB}> (or C<BEGIN {require +'perl5db.pl'}> if not present) before the first line of your program. + +=item * + +The array C<@{"_<$filename"}> holds the lines of $filename for all +files compiled by Perl. The same for C<eval>ed strings that contain +subroutines, or which are currently being executed. The $filename +for C<eval>ed strings looks like C<(eval 34)>. Code assertions +in regexes look like C<(re_eval 19)>. + +=item * + +The hash C<%{"_<$filename"}> contains breakpoints and actions keyed +by line number. Individual entries (as opposed to the whole hash) +are settable. Perl only cares about Boolean true here, although +the values used by F<perl5db.pl> have the form +C<"$break_condition\0$action">. Values in this hash are magical +in numeric context: they are zeros if the line is not breakable. + +The same holds for evaluated strings that contain subroutines, or +which are currently being executed. The $filename for C<eval>ed strings +looks like C<(eval 34)> or C<(re_eval 19)>. + +=item * + +The scalar C<${"_<$filename"}> contains C<"_<$filename">. This is +also the case for evaluated strings that contain subroutines, or +which are currently being executed. The $filename for C<eval>ed +strings looks like C<(eval 34)> or C<(re_eval 19)>. + +=item * + +After each C<require>d file is compiled, but before it is executed, +C<DB::postponed(*{"_<$filename"})> is called if the subroutine +C<DB::postponed> exists. Here, the $filename is the expanded name of +the C<require>d file, as found in the values of %INC. + +=item * + +After each subroutine C<subname> is compiled, the existence of +C<$DB::postponed{subname}> is checked. If this key exists, +C<DB::postponed(subname)> is called if the C<DB::postponed> subroutine +also exists. + +=item * + +A hash C<%DB::sub> is maintained, whose keys are subroutine names +and whose values have the form C<filename:startline-endline>. +C<filename> has the form C<(eval 34)> for subroutines defined inside +C<eval>s, or C<(re_eval 19)> for those within regex code assertions. + +=item * + +When the execution of your program reaches a point that can hold a +breakpoint, the C<DB::DB()> subroutine is called any of the variables +$DB::trace, $DB::single, or $DB::signal is true. These variables +are not C<local>izable. This feature is disabled when executing +inside C<DB::DB()>, including functions called from it +unless C<< $^D & (1<<30) >> is true. + +=item * + +When execution of the program reaches a subroutine call, a call to +C<&DB::sub>(I<args>) is made instead, with C<$DB::sub> holding the +name of the called subroutine. This doesn't happen if the subroutine +was compiled in the C<DB> package.) + +=back + +Note that if C<&DB::sub> needs external data for it to work, no +subroutine call is possible until this is done. For the standard +debugger, the C<$DB::deep> variable (how many levels of recursion +deep into the debugger you can go before a mandatory break) gives +an example of such a dependency. + +=head2 Writing Your Own Debugger + +The minimal working debugger consists of one line + + sub DB::DB {} + +which is quite handy as contents of C<PERL5DB> environment +variable: + + $ PERL5DB="sub DB::DB {}" perl -d your-script + +Another brief debugger, slightly more useful, could be created +with only the line: + + sub DB::DB {print ++$i; scalar <STDIN>} + +This debugger would print the sequential number of encountered +statement, and would wait for you to hit a newline before continuing. + +The following debugger is quite functional: + + { + package DB; + sub DB {} + sub sub {print ++$i, " $sub\n"; &$sub} + } + +It prints the sequential number of subroutine call and the name of the +called subroutine. Note that C<&DB::sub> should be compiled into the +package C<DB>. + +At the start, the debugger reads your rc file (F<./.perldb> or +F<~/.perldb> under Unix), which can set important options. This file may +define a subroutine C<&afterinit> to be executed after the debugger is +initialized. + +After the rc file is read, the debugger reads the PERLDB_OPTS +environment variable and parses this as the remainder of a C<O ...> +line as one might enter at the debugger prompt. + +The debugger also maintains magical internal variables, such as +C<@DB::dbline>, C<%DB::dbline>, which are aliases for +C<@{"::_<current_file"}> C<%{"::_<current_file"}>. Here C<current_file> +is the currently selected file, either explicitly chosen with the +debugger's C<f> command, or implicitly by flow of execution. + +Some functions are provided to simplify customization. See +L<perldebug/"Options"> for description of options parsed by +C<DB::parse_options(string)>. The function C<DB::dump_trace(skip[, +count])> skips the specified number of frames and returns a list +containing information about the calling frames (all of them, if +C<count> is missing). Each entry is reference to a a hash with +keys C<context> (either C<.>, C<$>, or C<@>), C<sub> (subroutine +name, or info about C<eval>), C<args> (C<undef> or a reference to +an array), C<file>, and C<line>. + +The function C<DB::print_trace(FH, skip[, count[, short]])> prints +formatted info about caller frames. The last two functions may be +convenient as arguments to C<< < >>, C<< << >> commands. + +Note that any variables and functions that are not documented in +this manpages (or in L<perldebug>) are considered for internal +use only, and as such are subject to change without notice. + +=head1 Frame Listing Output Examples + +The C<frame> option can be used to control the output of frame +information. For example, contrast this expression trace: + + $ perl -de 42 + Stack dump during die enabled outside of evals. + + Loading DB routines from perl5db.pl patch level 0.94 + Emacs support available. + + Enter h or `h h' for help. + + main::(-e:1): 0 + DB<1> sub foo { 14 } + + DB<2> sub bar { 3 } + + DB<3> t print foo() * bar() + main::((eval 172):3): print foo() + bar(); + main::foo((eval 168):2): + main::bar((eval 170):2): + 42 + +with this one, once the C<O>ption C<frame=2> has been set: + + DB<4> O f=2 + frame = '2' + DB<5> t print foo() * bar() + 3: foo() * bar() + entering main::foo + 2: sub foo { 14 }; + exited main::foo + entering main::bar + 2: sub bar { 3 }; + exited main::bar + 42 + +By way of demonstration, we present below a laborious listing +resulting from setting your C<PERLDB_OPTS> environment variable to +the value C<f=n N>, and running I<perl -d -V> from the command line. +Examples use various values of C<n> are shown to give you a feel +for the difference between settings. Long those it may be, this +is not a complete listing, but only excerpts. + +=over 4 + +=item 1 + + entering main::BEGIN + entering Config::BEGIN + Package lib/Exporter.pm. + Package lib/Carp.pm. + Package lib/Config.pm. + entering Config::TIEHASH + entering Exporter::import + entering Exporter::export + entering Config::myconfig + entering Config::FETCH + entering Config::FETCH + entering Config::FETCH + entering Config::FETCH + +=item 2 + + entering main::BEGIN + entering Config::BEGIN + Package lib/Exporter.pm. + Package lib/Carp.pm. + exited Config::BEGIN + Package lib/Config.pm. + entering Config::TIEHASH + exited Config::TIEHASH + entering Exporter::import + entering Exporter::export + exited Exporter::export + exited Exporter::import + exited main::BEGIN + entering Config::myconfig + entering Config::FETCH + exited Config::FETCH + entering Config::FETCH + exited Config::FETCH + entering Config::FETCH + +=item 4 + + in $=main::BEGIN() from /dev/null:0 + in $=Config::BEGIN() from lib/Config.pm:2 + Package lib/Exporter.pm. + Package lib/Carp.pm. + Package lib/Config.pm. + in $=Config::TIEHASH('Config') from lib/Config.pm:644 + in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 + in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from li + in @=Config::myconfig() from /dev/null:0 + in $=Config::FETCH(ref(Config), 'package') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'baserev') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'PERL_VERSION') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'PERL_SUBVERSION') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'osname') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'osvers') from lib/Config.pm:574 + +=item 6 + + in $=main::BEGIN() from /dev/null:0 + in $=Config::BEGIN() from lib/Config.pm:2 + Package lib/Exporter.pm. + Package lib/Carp.pm. + out $=Config::BEGIN() from lib/Config.pm:0 + Package lib/Config.pm. + in $=Config::TIEHASH('Config') from lib/Config.pm:644 + out $=Config::TIEHASH('Config') from lib/Config.pm:644 + in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 + in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/ + out $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/ + out $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 + out $=main::BEGIN() from /dev/null:0 + in @=Config::myconfig() from /dev/null:0 + in $=Config::FETCH(ref(Config), 'package') from lib/Config.pm:574 + out $=Config::FETCH(ref(Config), 'package') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'baserev') from lib/Config.pm:574 + out $=Config::FETCH(ref(Config), 'baserev') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'PERL_VERSION') from lib/Config.pm:574 + out $=Config::FETCH(ref(Config), 'PERL_VERSION') from lib/Config.pm:574 + in $=Config::FETCH(ref(Config), 'PERL_SUBVERSION') from lib/Config.pm:574 + +=item 14 + + in $=main::BEGIN() from /dev/null:0 + in $=Config::BEGIN() from lib/Config.pm:2 + Package lib/Exporter.pm. + Package lib/Carp.pm. + out $=Config::BEGIN() from lib/Config.pm:0 + Package lib/Config.pm. + in $=Config::TIEHASH('Config') from lib/Config.pm:644 + out $=Config::TIEHASH('Config') from lib/Config.pm:644 + in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 + in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/E + out $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/E + out $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 + out $=main::BEGIN() from /dev/null:0 + in @=Config::myconfig() from /dev/null:0 + in $=Config::FETCH('Config=HASH(0x1aa444)', 'package') from lib/Config.pm:574 + out $=Config::FETCH('Config=HASH(0x1aa444)', 'package') from lib/Config.pm:574 + in $=Config::FETCH('Config=HASH(0x1aa444)', 'baserev') from lib/Config.pm:574 + out $=Config::FETCH('Config=HASH(0x1aa444)', 'baserev') from lib/Config.pm:574 + +=item 30 + + in $=CODE(0x15eca4)() from /dev/null:0 + in $=CODE(0x182528)() from lib/Config.pm:2 + Package lib/Exporter.pm. + out $=CODE(0x182528)() from lib/Config.pm:0 + scalar context return from CODE(0x182528): undef + Package lib/Config.pm. + in $=Config::TIEHASH('Config') from lib/Config.pm:628 + out $=Config::TIEHASH('Config') from lib/Config.pm:628 + scalar context return from Config::TIEHASH: empty hash + in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 + in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/Exporter.pm:171 + out $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/Exporter.pm:171 + scalar context return from Exporter::export: '' + out $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 + scalar context return from Exporter::import: '' + +=back + +In all cases shown above, the line indentation shows the call tree. +If bit 2 of C<frame> is set, a line is printed on exit from a +subroutine as well. If bit 4 is set, the arguments are printed +along with the caller info. If bit 8 is set, the arguments are +printed even if they are tied or references. If bit 16 is set, the +return value is printed, too. + +When a package is compiled, a line like this + + Package lib/Carp.pm. + +is printed with proper indentation. + +=head1 Debugging regular expressions + +There are two ways to enable debugging output for regular expressions. + +If your perl is compiled with C<-DDEBUGGING>, you may use the +B<-Dr> flag on the command line. + +Otherwise, one can C<use re 'debug'>, which has effects at +compile time and run time. It is not lexically scoped. + +=head2 Compile-time output + +The debugging output at compile time looks like this: + + compiling RE `[bc]d(ef*g)+h[ij]k$' + size 43 first at 1 + 1: ANYOF(11) + 11: EXACT <d>(13) + 13: CURLYX {1,32767}(27) + 15: OPEN1(17) + 17: EXACT <e>(19) + 19: STAR(22) + 20: EXACT <f>(0) + 22: EXACT <g>(24) + 24: CLOSE1(26) + 26: WHILEM(0) + 27: NOTHING(28) + 28: EXACT <h>(30) + 30: ANYOF(40) + 40: EXACT <k>(42) + 42: EOL(43) + 43: END(0) + anchored `de' at 1 floating `gh' at 3..2147483647 (checking floating) + stclass `ANYOF' minlen 7 + +The first line shows the pre-compiled form of the regex. The second +shows the size of the compiled form (in arbitrary units, usually +4-byte words) and the label I<id> of the first node that does a +match. + +The last line (split into two lines above) contains optimizer +information. In the example shown, the optimizer found that the match +should contain a substring C<de> at offset 1, plus substring C<gh> +at some offset between 3 and infinity. Moreover, when checking for +these substrings (to abandon impossible matches quickly), Perl will check +for the substring C<gh> before checking for the substring C<de>. The +optimizer may also use the knowledge that the match starts (at the +C<first> I<id>) with a character class, and the match cannot be +shorter than 7 chars. + +The fields of interest which may appear in the last line are + +=over + +=item C<anchored> I<STRING> C<at> I<POS> + +=item C<floating> I<STRING> C<at> I<POS1..POS2> + +See above. + +=item C<matching floating/anchored> + +Which substring to check first. + +=item C<minlen> + +The minimal length of the match. + +=item C<stclass> I<TYPE> + +Type of first matching node. + +=item C<noscan> + +Don't scan for the found substrings. + +=item C<isall> + +Means that the optimizer info is all that the regular +expression contains, and thus one does not need to enter the regex engine at +all. + +=item C<GPOS> + +Set if the pattern contains C<\G>. + +=item C<plus> + +Set if the pattern starts with a repeated char (as in C<x+y>). + +=item C<implicit> + +Set if the pattern starts with C<.*>. + +=item C<with eval> + +Set if the pattern contain eval-groups, such as C<(?{ code })> and +C<(??{ code })>. + +=item C<anchored(TYPE)> + +If the pattern may match only at a handful of places, (with C<TYPE> +being C<BOL>, C<MBOL>, or C<GPOS>. See the table below. + +=back + +If a substring is known to match at end-of-line only, it may be +followed by C<$>, as in C<floating `k'$>. + +The optimizer-specific info is used to avoid entering (a slow) regex +engine on strings that will not definitely match. If C<isall> flag +is set, a call to the regex engine may be avoided even when the optimizer +found an appropriate place for the match. + +The rest of the output contains the list of I<nodes> of the compiled +form of the regex. Each line has format + +C< >I<id>: I<TYPE> I<OPTIONAL-INFO> (I<next-id>) + +=head2 Types of nodes + +Here are the possible types, with short descriptions: + + # TYPE arg-description [num-args] [longjump-len] DESCRIPTION + + # Exit points + END no End of program. + SUCCEED no Return from a subroutine, basically. + + # Anchors: + BOL no Match "" at beginning of line. + MBOL no Same, assuming multiline. + SBOL no Same, assuming singleline. + EOS no Match "" at end of string. + EOL no Match "" at end of line. + MEOL no Same, assuming multiline. + SEOL no Same, assuming singleline. + BOUND no Match "" at any word boundary + BOUNDL no Match "" at any word boundary + NBOUND no Match "" at any word non-boundary + NBOUNDL no Match "" at any word non-boundary + GPOS no Matches where last m//g left off. + + # [Special] alternatives + ANY no Match any one character (except newline). + SANY no Match any one character. + ANYOF sv Match character in (or not in) this class. + ALNUM no Match any alphanumeric character + ALNUML no Match any alphanumeric char in locale + NALNUM no Match any non-alphanumeric character + NALNUML no Match any non-alphanumeric char in locale + SPACE no Match any whitespace character + SPACEL no Match any whitespace char in locale + NSPACE no Match any non-whitespace character + NSPACEL no Match any non-whitespace char in locale + DIGIT no Match any numeric character + NDIGIT no Match any non-numeric character + + # BRANCH The set of branches constituting a single choice are hooked + # together with their "next" pointers, since precedence prevents + # anything being concatenated to any individual branch. The + # "next" pointer of the last BRANCH in a choice points to the + # thing following the whole choice. This is also where the + # final "next" pointer of each individual branch points; each + # branch starts with the operand node of a BRANCH node. + # + BRANCH node Match this alternative, or the next... + + # BACK Normal "next" pointers all implicitly point forward; BACK + # exists to make loop structures possible. + # not used + BACK no Match "", "next" ptr points backward. + + # Literals + EXACT sv Match this string (preceded by length). + EXACTF sv Match this string, folded (prec. by length). + EXACTFL sv Match this string, folded in locale (w/len). + + # Do nothing + NOTHING no Match empty string. + # A variant of above which delimits a group, thus stops optimizations + TAIL no Match empty string. Can jump here from outside. + + # STAR,PLUS '?', and complex '*' and '+', are implemented as circular + # BRANCH structures using BACK. Simple cases (one character + # per match) are implemented with STAR and PLUS for speed + # and to minimize recursive plunges. + # + STAR node Match this (simple) thing 0 or more times. + PLUS node Match this (simple) thing 1 or more times. + + CURLY sv 2 Match this simple thing {n,m} times. + CURLYN no 2 Match next-after-this simple thing + # {n,m} times, set parens. + CURLYM no 2 Match this medium-complex thing {n,m} times. + CURLYX sv 2 Match this complex thing {n,m} times. + + # This terminator creates a loop structure for CURLYX + WHILEM no Do curly processing and see if rest matches. + + # OPEN,CLOSE,GROUPP ...are numbered at compile time. + OPEN num 1 Mark this point in input as start of #n. + CLOSE num 1 Analogous to OPEN. + + REF num 1 Match some already matched string + REFF num 1 Match already matched string, folded + REFFL num 1 Match already matched string, folded in loc. + + # grouping assertions + IFMATCH off 1 2 Succeeds if the following matches. + UNLESSM off 1 2 Fails if the following matches. + SUSPEND off 1 1 "Independent" sub-regex. + IFTHEN off 1 1 Switch, should be preceded by switcher . + GROUPP num 1 Whether the group matched. + + # Support for long regex + LONGJMP off 1 1 Jump far away. + BRANCHJ off 1 1 BRANCH with long offset. + + # The heavy worker + EVAL evl 1 Execute some Perl code. + + # Modifiers + MINMOD no Next operator is not greedy. + LOGICAL no Next opcode should set the flag only. + + # This is not used yet + RENUM off 1 1 Group with independently numbered parens. + + # This is not really a node, but an optimized away piece of a "long" node. + # To simplify debugging output, we mark it as if it were a node + OPTIMIZED off Placeholder for dump. + +=head2 Run-time output + +First of all, when doing a match, one may get no run-time output even +if debugging is enabled. This means that the regex engine was never +entered and that all of the job was therefore done by the optimizer. + +If the regex engine was entered, the output may look like this: + + Matching `[bc]d(ef*g)+h[ij]k$' against `abcdefg__gh__' + Setting an EVAL scope, savestack=3 + 2 <ab> <cdefg__gh_> | 1: ANYOF + 3 <abc> <defg__gh_> | 11: EXACT <d> + 4 <abcd> <efg__gh_> | 13: CURLYX {1,32767} + 4 <abcd> <efg__gh_> | 26: WHILEM + 0 out of 1..32767 cc=effff31c + 4 <abcd> <efg__gh_> | 15: OPEN1 + 4 <abcd> <efg__gh_> | 17: EXACT <e> + 5 <abcde> <fg__gh_> | 19: STAR + EXACT <f> can match 1 times out of 32767... + Setting an EVAL scope, savestack=3 + 6 <bcdef> <g__gh__> | 22: EXACT <g> + 7 <bcdefg> <__gh__> | 24: CLOSE1 + 7 <bcdefg> <__gh__> | 26: WHILEM + 1 out of 1..32767 cc=effff31c + Setting an EVAL scope, savestack=12 + 7 <bcdefg> <__gh__> | 15: OPEN1 + 7 <bcdefg> <__gh__> | 17: EXACT <e> + restoring \1 to 4(4)..7 + failed, try continuation... + 7 <bcdefg> <__gh__> | 27: NOTHING + 7 <bcdefg> <__gh__> | 28: EXACT <h> + failed... + failed... + +The most significant information in the output is about the particular I<node> +of the compiled regex that is currently being tested against the target string. +The format of these lines is + +C< >I<STRING-OFFSET> <I<PRE-STRING>> <I<POST-STRING>> |I<ID>: I<TYPE> + +The I<TYPE> info is indented with respect to the backtracking level. +Other incidental information appears interspersed within. + +=head1 Debugging Perl memory usage + +Perl is a profligate wastrel when it comes to memory use. There +is a saying that to estimate memory usage of Perl, assume a reasonable +algorithm for memory allocation, multiply that estimate by 10, and +while you still may miss the mark, at least you won't be quite so +astonished. This is not absolutely true, but may prvide a good +grasp of what happens. + +Assume that an integer cannot take less than 20 bytes of memory, a +float cannot take less than 24 bytes, a string cannot take less +than 32 bytes (all these examples assume 32-bit architectures, the +result are quite a bit worse on 64-bit architectures). If a variable +is accessed in two of three different ways (which require an integer, +a float, or a string), the memory footprint may increase yet another +20 bytes. A sloppy malloc(3) implementation can make inflate these +numbers dramatically. + +On the opposite end of the scale, a declaration like + + sub foo; + +may take up to 500 bytes of memory, depending on which release of Perl +you're running. + +Anecdotal estimates of source-to-compiled code bloat suggest an +eightfold increase. This means that the compiled form of reasonable +(normally commented, properly indented etc.) code will take +about eight times more space in memory than the code took +on disk. + +There are two Perl-specific ways to analyze memory usage: +$ENV{PERL_DEBUG_MSTATS} and B<-DL> command-line switch. The first +is available only if Perl is compiled with Perl's malloc(); the +second only if Perl was built with C<-DDEBUGGING>. See the +instructions for how to do this in the F<INSTALL> podpage at +the top level of the Perl source tree. + +=head2 Using C<$ENV{PERL_DEBUG_MSTATS}> + +If your perl is using Perl's malloc() and was compiled with the +necessary switches (this is the default), then it will print memory +usage statistics after compiling your code hwen C<< $ENV{PERL_DEBUG_MSTATS} +> 1 >>, and before termination of the program when C<< +$ENV{PERL_DEBUG_MSTATS} >= 1 >>. The report format is similar to +the following example: + + $ PERL_DEBUG_MSTATS=2 perl -e "require Carp" + Memory allocation statistics after compilation: (buckets 4(4)..8188(8192) + 14216 free: 130 117 28 7 9 0 2 2 1 0 0 + 437 61 36 0 5 + 60924 used: 125 137 161 55 7 8 6 16 2 0 1 + 74 109 304 84 20 + Total sbrk(): 77824/21:119. Odd ends: pad+heads+chain+tail: 0+636+0+2048. + Memory allocation statistics after execution: (buckets 4(4)..8188(8192) + 30888 free: 245 78 85 13 6 2 1 3 2 0 1 + 315 162 39 42 11 + 175816 used: 265 176 1112 111 26 22 11 27 2 1 1 + 196 178 1066 798 39 + Total sbrk(): 215040/47:145. Odd ends: pad+heads+chain+tail: 0+2192+0+6144. + +It is possible to ask for such a statistic at arbitrary points in +your execution using the mstats() function out of the standard +Devel::Peek module. + +Here is some explanation of that format: + +=over + +=item C<buckets SMALLEST(APPROX)..GREATEST(APPROX)> + +Perl's malloc() uses bucketed allocations. Every request is rounded +up to the closest bucket size available, and a bucket is taken from +the pool of buckets of that size. + +The line above describes the limits of buckets currently in use. +Each bucket has two sizes: memory footprint and the maximal size +of user data that can fit into this bucket. Suppose in the above +example that the smallest bucket were size 4. The biggest bucket +would have usable size 8188, and the memory footprint would be 8192. + +In a Perl built for debugging, some buckets may have negative usable +size. This means that these buckets cannot (and will not) be used. +For larger buckets, the memory footprint may be one page greater +than a power of 2. If so, case the corresponding power of two is +printed in the C<APPROX> field above. + +=item Free/Used + +The 1 or 2 rows of numbers following that correspond to the number +of buckets of each size between C<SMALLEST> and C<GREATEST>. In +the first row, the sizes (memory footprints) of buckets are powers +of two--or possibly one page greater. In the second row, if present, +the memory footprints of the buckets are between the memory footprints +of two buckets "above". + +For example, suppose under the pervious example, the memory footprints +were + + free: 8 16 32 64 128 256 512 1024 2048 4096 8192 + 4 12 24 48 80 + +With non-C<DEBUGGING> perl, the buckets starting from C<128> have +a 4-byte overhead, and thus a 8192-long bucket may take up to +8188-byte allocations. + +=item C<Total sbrk(): SBRKed/SBRKs:CONTINUOUS> + +The first two fields give the total amount of memory perl sbrk(2)ed +(ess-broken? :-) and number of sbrk(2)s used. The third number is +what perl thinks about continuity of returned chunks. So long as +this number is positive, malloc() will assume that it is probable +that sbrk(2) will provide continuous memory. + +Memory allocated by external libraries is not counted. + +=item C<pad: 0> + +The amount of sbrk(2)ed memory needed to keep buckets aligned. + +=item C<heads: 2192> + +Although memory overhead of bigger buckets is kept inside the bucket, for +smaller buckets, it is kept in separate areas. This field gives the +total size of these areas. + +=item C<chain: 0> + +malloc() may want to subdivide a bigger bucket into smaller buckets. +If only a part of the deceased bucket is left unsubdivided, the rest +is kept as an element of a linked list. This field gives the total +size of these chunks. + +=item C<tail: 6144> + +To minimize the number of sbrk(2)s, malloc() asks for more memory. This +field gives the size of the yet unused part, which is sbrk(2)ed, but +never touched. + +=back + +=head2 Example of using B<-DL> switch + +Below we show how to analyse memory usage by + + do 'lib/auto/POSIX/autosplit.ix'; + +The file in question contains a header and 146 lines similar to + + sub getcwd; + +B<WARNING>: The discussion below supposes 32-bit architecture. In +newer releases of Perl, memory usage of the constructs discussed +here is greatly improved, but the story discussed below is a real-life +story. This story is mercilessly terse, and assumes rather more than cursory +knowledge of Perl internals. Type space to continue, `q' to quit. +(Actually, you just want to skip to the next section.) + +Here is the itemized list of Perl allocations performed during parsing +of this file: + + !!! "after" at test.pl line 3. + Id subtot 4 8 12 16 20 24 28 32 36 40 48 56 64 72 80 80+ + 0 02 13752 . . . . 294 . . . . . . . . . . 4 + 0 54 5545 . . 8 124 16 . . . 1 1 . . . . . 3 + 5 05 32 . . . . . . . 1 . . . . . . . . + 6 02 7152 . . . . . . . . . . 149 . . . . . + 7 02 3600 . . . . . 150 . . . . . . . . . . + 7 03 64 . -1 . 1 . . 2 . . . . . . . . . + 7 04 7056 . . . . . . . . . . . . . . . 7 + 7 17 38404 . . . . . . . 1 . . 442 149 . . 147 . + 9 03 2078 17 249 32 . . . . 2 . . . . . . . . + + +To see this list, insert two C<warn('!...')> statements around the call: + + warn('!'); + do 'lib/auto/POSIX/autosplit.ix'; + warn('!!! "after"'); + +and run it with PErl's B<-DL> option. The first warn() will print +memory allocation info before parsing the file and will memorize +the statistics at this point (we ignore what it prints). The second +warn() prints increments with respect to these memorized data. This +is the printout shown above. + +Different I<Id>s on the left correspond to different subsystems of +the perl interpreter. They are just the first argument given to +the perl memory allocation API named New(). To find what C<9 03> +means, just B<grep> the perl source for C<903>. You'll find it in +F<util.c>, function savepvn(). (I know, you wonder why we told you +to B<grep> and then gave away the answer. That's because grepping +the source is good for the soul.) This function is used to store +a copy of an existing chunk of memory. Using a C debugger, one can +see that the function was called either directly from gv_init() or +via sv_magic(), and that gv_init() is called from gv_fetchpv()--which +was itself called from newSUB(). Please stop to catch your breath now. + +B<NOTE>: To reach this point in the debugger and skip the calls to +savepvn() during the compilation of the main program, you should +set a C breakpoint +in Perl_warn(), continue until this point is reached, and I<then> set +a C breakpoint in Perl_savepvn(). Note that you may need to skip a +handful of Perl_savepvn() calls that do not correspond to mass production +of CVs (there are more C<903> allocations than 146 similar lines of +F<lib/auto/POSIX/autosplit.ix>). Note also that C<Perl_> prefixes are +added by macroization code in perl header files to avoid conflicts +with external libraries. + +Anyway, we see that C<903> ids correspond to creation of globs, twice +per glob - for glob name, and glob stringification magic. + +Here are explanations for other I<Id>s above: + +=over + +=item C<717> + +CReates bigger C<XPV*> structures. In the case above, it +creates 3 C<AV>s per subroutine, one for a list of lexical variable +names, one for a scratchpad (which contains lexical variables and +C<targets>), and one for the array of scratchpads needed for +recursion. + +It also creates a C<GV> and a C<CV> per subroutine, all called from +start_subparse(). + +=item C<002> + +Creates a C array corresponding to the C<AV> of scratchpads and the +scratchpad itself. The first fake entry of this scratchpad is +created though the subroutine itself is not defined yet. + +It also creates C arrays to keep data for the stash. This is one HV, +but it grows; thus, there are 4 big allocations: the big chunks are not +freed, but are kept as additional arenas for C<SV> allocations. + +=item C<054> + +Creates a C<HEK> for the name of the glob for the subroutine. This +name is a key in a I<stash>. + +Big allocations with this I<Id> correspond to allocations of new +arenas to keep C<HE>. + +=item C<602> + +Creates a C<GP> for the glob for the subroutine. + +=item C<702> + +Creates the C<MAGIC> for the glob for the subroutine. + +=item C<704> + +Creates I<arenas> which keep SVs. + +=back + +=head2 B<-DL> details + +If Perl is run with B<-DL> option, then warn()s that start with `!' +behave specially. They print a list of I<categories> of memory +allocations, and statistics of allocations of different sizes for +these categories. + +If warn() string starts with + +=over + +=item C<!!!> + +print changed categories only, print the differences in counts of allocations. + +=item C<!!> + +print grown categories only; print the absolute values of counts, and totals. + +=item C<!> + +print nonempty categories, print the absolute values of counts and totals. + +=back + +=head2 Limitations of B<-DL> statistics + +If an extension or external library does not use the Perl API to +allocate memory, such allocations are not counted. + +=head1 SEE ALSO + +L<perldebug>, +L<perlguts>, +L<perlrun> +L<re>, +and +L<Devel::Dprof>. diff --git a/contrib/perl5/pod/perldebug.pod b/contrib/perl5/pod/perldebug.pod index 760d517..c8ef60f 100644 --- a/contrib/perl5/pod/perldebug.pod +++ b/contrib/perl5/pod/perldebug.pod @@ -8,15 +8,6 @@ First of all, have you tried using the B<-w> switch? =head1 The Perl Debugger -"As soon as we started programming, we found to our -surprise that it wasn't as easy to get programs right -as we had thought. Debugging had to be discovered. -I can remember the exact instant when I realized that -a large part of my life from then on was going to be -spent in finding mistakes in my own programs." - -I< --Maurice Wilkes, 1949> - If you invoke Perl with the B<-d> switch, your script runs under the Perl source debugger. This works like an interactive Perl environment, prompting for debugger commands that let you examine @@ -25,14 +16,14 @@ variables, etc. This is so convenient that you often fire up the debugger all by itself just to test out Perl constructs interactively to see what they do. For example: - perl -d -e 42 + $ perl -d -e 42 -In Perl, the debugger is not a separate program as it usually is in the +In Perl, the debugger is not a separate program the way it usually is in the typical compiled environment. Instead, the B<-d> flag tells the compiler to insert source information into the parse trees it's about to hand off to the interpreter. That means your code must first compile correctly for the debugger to work on it. Then when the interpreter starts up, it -preloads a Perl library file containing the debugger itself. +preloads a special Perl library file containing the debugger. The program will halt I<right before> the first run-time executable statement (but see below regarding compile-time statements) and ask you @@ -41,12 +32,15 @@ the debugger halts and shows you a line of code, it always displays the line it's I<about> to execute, rather than the one it has just executed. Any command not recognized by the debugger is directly executed -(C<eval>'d) as Perl code in the current package. (The debugger uses the -DB package for its own state information.) +(C<eval>'d) as Perl code in the current package. (The debugger +uses the DB package for keeping its own state information.) -Leading white space before a command would cause the debugger to think -it's I<NOT> a debugger command but for Perl, so be careful not to do -that. +For any text entered at the debugger prompt, leading and trailing whitespace +is first stripped before further processing. If a debugger command +coincides with some function in your own program, merely precede the +function with something that doesn't look like a debugger command, such +as a leading C<;> or perhaps a C<+>, or by wrapping it with parentheses +or braces. =head2 Debugger Commands @@ -64,8 +58,8 @@ argument of C<h h> produces a more compact help listing, designed to fit together on one screen. If the output of the C<h> command (or any command, for that matter) scrolls -past your screen, either precede the command with a leading pipe symbol so -it's run through your pager, as in +past your screen, precede the command with a leading pipe symbol so +that it's run through your pager, as in DB> |h @@ -74,7 +68,7 @@ You may change the pager which is used via C<O pager=...> command. =item p expr Same as C<print {$DB::OUT} expr> in the current package. In particular, -because this is just Perl's own B<print> function, this means that nested +because this is just Perl's own C<print> function, this means that nested data structures and objects are not dumped, unlike with the C<x> command. The C<DB::OUT> filehandle is opened to F</dev/tty>, regardless of @@ -84,26 +78,25 @@ where STDOUT may be redirected to. Evaluates its expression in list context and dumps out the result in a pretty-printed fashion. Nested data structures are printed out -recursively, unlike the C<print> function. +recursively, unlike the real C<print> function in Perl. +See L<Dumpvalue> if you'd like to do this yourself. -The details of printout are governed by multiple C<O>ptions. +The output format is governed by multiple options described under +L<"Options">. =item V [pkg [vars]] -Display all (or some) variables in package (defaulting to the C<main> -package) using a data pretty-printer (hashes show their keys and values so -you see what's what, control characters are made printable, etc.). Make -sure you don't put the type specifier (like C<$>) there, just the symbol -names, like this: +Display all (or some) variables in package (defaulting to C<main>) +using a data pretty-printer (hashes show their keys and values so +you see what's what, control characters are made printable, etc.). +Make sure you don't put the type specifier (like C<$>) there, just +the symbol names, like this: V DB filename line -Use C<~pattern> and C<!pattern> for positive and negative regexps. - -Nested data structures are printed out in a legible fashion, unlike -the C<print> function. +Use C<~pattern> and C<!pattern> for positive and negative regexes. -The details of printout are governed by multiple C<O>ptions. +This is similar to calling the C<x> command on each applicable var. =item X [vars] @@ -115,18 +108,23 @@ Produce a stack backtrace. See below for details on its output. =item s [expr] -Single step. Executes until it reaches the beginning of another +Single step. Executes until the beginning of another statement, descending into subroutine calls. If an expression is supplied that includes function calls, it too will be single-stepped. =item n [expr] -Next. Executes over subroutine calls, until it reaches the beginning +Next. Executes over subroutine calls, until the beginning of the next statement. If an expression is supplied that includes function calls, those functions will be executed with stops before each statement. -=item E<lt>CRE<gt> +=item r + +Continue until the return from the current subroutine. +Dump the return value if the C<PrintRet> option is set (default). + +=item <CR> Repeat last C<n> or C<s> command. @@ -153,7 +151,8 @@ List a single line. =item l subname -List first window of lines from subroutine. +List first window of lines from subroutine. I<subname> may +be a variable that contains a code reference. =item - @@ -165,18 +164,24 @@ List window (a few lines) around the current line. =item . -Return debugger pointer to the last-executed line and -print it out. +Return the internal debugger pointer to the line last +executed, and print out that line. =item f filename -Switch to viewing a different file or eval statement. If C<filename> -is not a full filename as found in values of %INC, it is considered as -a regexp. +Switch to viewing a different file or C<eval> statement. If I<filename> +is not a full pathname found in the values of %INC, it is considered +a regex. + +C<eval>ed strings (when accessible) are considered to be filenames: +C<f (eval 7)> and C<f eval 7\b> access the body of the 7th C<eval>ed string +(in the order of execution). The bodies of the currently executed C<eval> +and of C<eval>ed strings that define subroutines are saved and thus +accessible. =item /pattern/ -Search forwards for pattern; final / is optional. +Search forwards for pattern (a Perl regex); final / is optional. =item ?pattern? @@ -186,58 +191,27 @@ Search backwards for pattern; final ? is optional. List all breakpoints and actions. -=item S [[!]pattern] +=item S [[!]regex] -List subroutine names [not] matching pattern. +List subroutine names [not] matching the regex. =item t -Toggle trace mode (see also C<AutoTrace> C<O>ption). +Toggle trace mode (see also the C<AutoTrace> option). =item t expr -Trace through execution of expr. For example: - - $ perl -de 42 - Stack dump during die enabled outside of evals. - - Loading DB routines from perl5db.pl patch level 0.94 - Emacs support available. - - Enter h or `h h' for help. - - main::(-e:1): 0 - DB<1> sub foo { 14 } - - DB<2> sub bar { 3 } - - DB<3> t print foo() * bar() - main::((eval 172):3): print foo() + bar(); - main::foo((eval 168):2): - main::bar((eval 170):2): - 42 - -or, with the C<O>ption C<frame=2> set, - - DB<4> O f=2 - frame = '2' - DB<5> t print foo() * bar() - 3: foo() * bar() - entering main::foo - 2: sub foo { 14 }; - exited main::foo - entering main::bar - 2: sub bar { 3 }; - exited main::bar - 42 +Trace through execution of C<expr>. +See L<perldebguts/"Frame Listing Output Examples"> for examples. =item b [line] [condition] -Set a breakpoint. If line is omitted, sets a breakpoint on the line -that is about to be executed. If a condition is specified, it's -evaluated each time the statement is reached and a breakpoint is taken -only if the condition is true. Breakpoints may be set on only lines -that begin an executable statement. Conditions don't use B<if>: +Set a breakpoint before the given line. If I<line> is omitted, set a +breakpoint on the line about to be executed. If a condition +is specified, it's evaluated each time the statement is reached: a +breakpoint is taken only if the condition is true. Breakpoints may +only be set on lines that begin an executable statement. Conditions +don't use C<if>: b 237 $x > 30 b 237 ++$count237 < 11 @@ -245,26 +219,28 @@ that begin an executable statement. Conditions don't use B<if>: =item b subname [condition] -Set a breakpoint at the first line of the named subroutine. +Set a breakpoint before the first line of the named subroutine. I<subname> may +be a variable containing a code reference (in this case I<condition> +is not supported). =item b postpone subname [condition] -Set breakpoint at first line of subroutine after it is compiled. +Set a breakpoint at first line of subroutine after it is compiled. =item b load filename -Set breakpoint at the first executed line of the file. Filename should -be a full name as found in values of %INC. +Set a breakpoint before the first executed line of the I<filename>, +which should be a full pathname found amongst the %INC values. =item b compile subname -Sets breakpoint at the first statement executed after the subroutine -is compiled. +Sets a breakpoint before the first statement executed after the specified +subroutine is compiled. =item d [line] -Delete a breakpoint at the specified line. If line is omitted, deletes -the breakpoint on the line that is about to be executed. +Delete a breakpoint from the specified I<line>. If I<line> is omitted, deletes +the breakpoint from the line about to be executed. =item D @@ -272,7 +248,8 @@ Delete all installed breakpoints. =item a [line] command -Set an action to be done before the line is executed. +Set an action to be done before the line is executed. If I<line> is +omitted, set an action on the line about to be executed. The sequence of steps taken by the debugger is 1. check for a breakpoint at this line @@ -286,35 +263,224 @@ For example, this will print out $foo every time line a 53 print "DB FOUND $foo\n" +=item a [line] + +Delete an action from the specified line. If I<line> is omitted, delete +the action on the line that is about to be executed. + =item A Delete all installed actions. -=item W [expr] +=item W expr -Add a global watch-expression. +Add a global watch-expression. We hope you know what one of these +is, because they're supposed to be obvious. B<WARNING>: It is far +too easy to destroy your watch expressions by accidentally omitting +the I<expr>. =item W Delete all watch-expressions. -=item O [opt[=val]] [opt"val"] [opt?]... +=item O booloption ... + +Set each listed Boolean option to the value C<1>. + +=item O anyoption? ... + +Print out the value of one or more options. + +=item O option=value ... + +Set the value of one or more options. If the value has internal +whitespace, it should be quoted. For example, you could set C<O +pager="less -MQeicsNfr"> to call B<less> with those specific options. +You may use either single or double quotes, but if you do, you must +escape any embedded instances of same sort of quote you began with, +as well as any escaping any escapes that immediately precede that +quote but which are not meant to escape the quote itself. In other +words, you follow single-quoting rules irrespective of the quote; +eg: C<O option='this isn\'t bad'> or C<O option="She said, \"Isn't +it?\"">. + +For historical reasons, the C<=value> is optional, but defaults to +1 only where it is safe to do so--that is, mostly for Boolean +options. It is always better to assign a specific value using C<=>. +The C<option> can be abbreviated, but for clarity probably should +not be. Several options can be set together. See L<"Options"> for +a list of these. + +=item < ? + +List out all pre-prompt Perl command actions. + +=item < [ command ] + +Set an action (Perl command) to happen before every debugger prompt. +A multi-line command may be entered by backslashing the newlines. +B<WARNING> If C<command> is missing, all actions are wiped out! + +=item << command + +Add an action (Perl command) to happen before every debugger prompt. +A multi-line command may be entered by backwhacking the newlines. + +=item > ? + +List out post-prompt Perl command actions. + +=item > command + +Set an action (Perl command) to happen after the prompt when you've +just given a command to return to executing the script. A multi-line +command may be entered by backslashing the newlines (we bet you +couldn't've guessed this by now). B<WARNING> If C<command> is +missing, all actions are wiped out! + +=item >> command + +Adds an action (Perl command) to happen after the prompt when you've +just given a command to return to executing the script. A multi-line +command may be entered by slackbashing the newlines. + +=item { ? + +List out pre-prompt debugger commands. + +=item { [ command ] + +Set an action (debugger command) to happen before every debugger prompt. +A multi-line command may be entered in the customary fashion. +B<WARNING> If C<command> is missing, all actions are wiped out! + +Because this command is in some senses new, a warning is issued if +you appear to have accidentally entered a block instead. If that's +what you mean to do, write it as with C<;{ ... }> or even +C<do { ... }>. + +=item {{ command + +Add an action (debugger command) to happen before every debugger prompt. +A multi-line command may be entered, if you can guess how: see above. + +=item ! number + +Redo a previous command (defaults to the previous command). + +=item ! -number + +Redo number'th previous command. + +=item ! pattern + +Redo last command that started with pattern. +See C<O recallCommand>, too. + +=item !! cmd + +Run cmd in a subprocess (reads from DB::IN, writes to DB::OUT) See +C<O shellBang>, also. Note that the user's current shell (well, +their C<$ENV{SHELL}> variable) will be used, which can interfere +with proper interpretation of exit status or signal and coredump +information. + +=item H -number + +Display last n commands. Only commands longer than one character are +listed. If I<number> is omitted, list them all. + +=item q or ^D + +Quit. ("quit" doesn't work for this, unless you've made an alias) +This is the only supported way to exit the debugger, though typing +C<exit> twice might work. + +Set the C<inhibit_exit> option to 0 if you want to be able to step +off the end the script. You may also need to set $finished to 0 +if you want to step through global destruction. + +=item R + +Restart the debugger by C<exec()>ing a new session. We try to maintain +your history across this, but internal settings and command-line options +may be lost. -Set or query values of options. val defaults to 1. opt can -be abbreviated. Several options can be listed. +The following setting are currently preserved: history, breakpoints, +actions, debugger options, and the Perl command-line +options B<-w>, B<-I>, and B<-e>. + +=item |dbcmd + +Run the debugger command, piping DB::OUT into your current pager. + +=item ||dbcmd + +Same as C<|dbcmd> but DB::OUT is temporarily C<select>ed as well. + +=item = [alias value] + +Define a command alias, like + + = quit q + +or list current aliases. + +=item command + +Execute command as a Perl statement. A trailing semicolon will be +supplied. If the Perl statement would otherwise be confused for a +Perl debugger, use a leading semicolon, too. + +=item m expr + +List which methods may be called on the result of the evaluated +expression. The expression may evaluated to a reference to a +blessed object, or to a package name. + +=item man [manpage] + +Despite its name, this calls your system's default documentation +viewer on the given page, or on the viewer itself if I<manpage> is +omitted. If that viewer is B<man>, the current C<Config> information +is used to invoke B<man> using the proper MANPATH or S<B<-M> +I<manpath>> option. Failed lookups of the form C<XXX> that match +known manpages of the form I<perlXXX> will be retried. This lets +you type C<man debug> or C<man op> from the debugger. + +On systems traditionally bereft of a usable B<man> command, the +debugger invokes B<perldoc>. Occasionally this determination is +incorrect due to recalcitrant vendors or rather more felicitously, +to enterprising users. If you fall into either category, just +manually set the $DB::doccmd variable to whatever viewer to view +the Perl documentation on your system. This may be set in an rc +file, or through direct assignment. We're still waiting for a +working example of something along the lines of: + + $DB::doccmd = 'netscape -remote http://something.here/'; + +=back + +=head2 Configurable Options + +The debugger has numerous options settable using the C<O> command, +either interactively or from the environment or an rc file. =over 12 =item C<recallCommand>, C<ShellBang> The characters used to recall command or spawn shell. By -default, these are both set to C<!>. +default, both are set to C<!>, which is unfortunate. =item C<pager> -Program to use for output of pager-piped commands (those -beginning with a C<|> character.) By default, -C<$ENV{PAGER}> will be used. +Program to use for output of pager-piped commands (those beginning +with a C<|> character.) By default, C<$ENV{PAGER}> will be used. +Because the debugger uses your current terminal characteristics +for bold and underlining, if the chosen pager does not pass escape +sequences through unchanged, the output of some debugger commands +will not be readable when sent through the pager. =item C<tkRunning> @@ -322,14 +488,23 @@ Run Tk while prompting (with ReadLine). =item C<signalLevel>, C<warnLevel>, C<dieLevel> -Level of verbosity. By default the debugger is in a sane verbose mode, -thus it will print backtraces on all the warnings and die-messages -which are going to be printed out, and will print a message when -interesting uncaught signals arrive. - -To disable this behaviour, set these values to 0. If C<dieLevel> is 2, -then the messages which will be caught by surrounding C<eval> are also -printed. +Level of verbosity. By default, the debugger leaves your exceptions +and warnings alone, because altering them can break correctly running +programs. It will attempt to print a message when uncaught INT, BUS, or +SEGV signals arrive. (But see the mention of signals in L<BUGS> below.) + +To disable this default safe mode, set these values to something higher +than 0. At a level of 1, you get backtraces upon receiving any kind +of warning (this is often annoying) or exception (this is +often valuable). Unfortunately, the debugger cannot discern fatal +exceptions from non-fatal ones. If C<dieLevel> is even 1, then your +non-fatal exceptions are also traced and unceremoniously altered if they +came from C<eval'd> strings or from any kind of C<eval> within modules +you're attempting to load. If C<dieLevel> is 2, the debugger doesn't +care where they came from: It usurps your exception handler and prints +out a trace, then modifies all exceptions with its own embellishments. +This may perhaps be useful for some tracing purposes, but tends to hopelessly +destroy any program that takes its exception handling seriously. =item C<AutoTrace> @@ -339,7 +514,10 @@ C<PERLDB_OPTS>). =item C<LineInfo> File or pipe to print line number info to. If it is a pipe (say, -C<|visual_perl_db>), then a short, "emacs like" message is used. +C<|visual_perl_db>), then a short message is used. This is the +mechanism used to interact with a slave editor or visual debugger, +such as the special C<vi> or C<emacs> hooks, or the C<ddd> graphical +debugger. =item C<inhibit_exit> @@ -347,29 +525,32 @@ If 0, allows I<stepping off> the end of the script. =item C<PrintRet> -affects printing of return value after C<r> command. +Print return value after C<r> command if set (default). =item C<ornaments> -affects screen appearance of the command line (see L<Term::ReadLine>). +Affects screen appearance of the command line (see L<Term::ReadLine>). +There is currently no way to disable these, which can render +some output illegible on some displays, or with some pagers. +This is considered a bug. =item C<frame> -affects printing messages on entry and exit from subroutines. If +Affects the printing of messages upon entry and exit from subroutines. If C<frame & 2> is false, messages are printed on entry only. (Printing -on exit may be useful if inter(di)spersed with other messages.) +on exit might be useful if interspersed with other messages.) -If C<frame & 4>, arguments to functions are printed as well as the -context and caller info. If C<frame & 8>, overloaded C<stringify> and -C<tie>d C<FETCH> are enabled on the printed arguments. If C<frame & -16>, the return value from the subroutine is printed as well. +If C<frame & 4>, arguments to functions are printed, plus context +and caller info. If C<frame & 8>, overloaded C<stringify> and +C<tie>d C<FETCH> is enabled on the printed arguments. If C<frame +& 16>, the return value from the subroutine is printed. The length at which the argument list is truncated is governed by the next option: =item C<maxTraceLen> -length at which the argument list is truncated when C<frame> option's +Length to truncate the argument list when the C<frame> option's bit 4 is set. =back @@ -385,7 +566,7 @@ Print only first N elements ('' for all). =item C<compactDump>, C<veryCompact> -Change style of array and hash dump. If C<compactDump>, short array +Change the style of array and hash output. If C<compactDump>, short array may be printed on one line. =item C<globPrint> @@ -406,29 +587,30 @@ Dump contents of "reused" addresses. =item C<quote>, C<HighBit>, C<undefPrint> -Change style of string dump. Default value of C<quote> is C<auto>, one -can enable either double-quotish dump, or single-quotish by setting it -to C<"> or C<'>. By default, characters with high bit set are printed -I<as is>. +Change the style of string dump. The default value for C<quote> +is C<auto>; one can enable double-quotish or single-quotish format +by setting it to C<"> or C<'>, respectively. By default, characters +with their high bit set are printed verbatim. =item C<UsageOnly> -I<very> rudimentally per-package memory usage dump. Calculates total -size of strings in variables in the package. +Rudimentary per-package memory usage dump. Calculates total +size of strings found in variables in the package. This does not +include lexicals in a module's file scope, or lost in closures. =back -During startup options are initialized from C<$ENV{PERLDB_OPTS}>. -You can put additional initialization options C<TTY>, C<noTTY>, +During startup, options are initialized from C<$ENV{PERLDB_OPTS}>. +You may place the initialization options C<TTY>, C<noTTY>, C<ReadLine>, and C<NonStop> there. -Example rc file: +If your rc file contains: - &parse_options("NonStop=1 LineInfo=db.out AutoTrace"); + parse_options("NonStop=1 LineInfo=db.out AutoTrace"); -The script will run without human intervention, putting trace information -into the file I<db.out>. (If you interrupt it, you would better reset -C<LineInfo> to something "interactive"!) +then your script will run without human intervention, putting trace +information into the file I<db.out>. (If you interrupt it, you'd +better reset C<LineInfo> to F</dev/tty> if you expect to see anything.) =over 12 @@ -438,173 +620,65 @@ The TTY to use for debugging I/O. =item C<noTTY> -If set, goes in C<NonStop> mode, and would not connect to a TTY. If -interrupt (or if control goes to debugger via explicit setting of -$DB::signal or $DB::single from the Perl script), connects to a TTY -specified by the C<TTY> option at startup, or to a TTY found at -runtime using C<Term::Rendezvous> module of your choice. +If set, the debugger goes into C<NonStop> mode and will not connect to a TTY. If +interrupted (or if control goes to the debugger via explicit setting of +$DB::signal or $DB::single from the Perl script), it connects to a TTY +specified in the C<TTY> option at startup, or to a tty found at +runtime using the C<Term::Rendezvous> module of your choice. -This module should implement a method C<new> which returns an object -with two methods: C<IN> and C<OUT>, returning two filehandles to use -for debugging input and output correspondingly. Method C<new> may -inspect an argument which is a value of C<$ENV{PERLDB_NOTTY}> at -startup, or is C<"/tmp/perldbtty$$"> otherwise. +This module should implement a method named C<new> that returns an object +with two methods: C<IN> and C<OUT>. These should return filehandles to use +for debugging input and output correspondingly. The C<new> method should +inspect an argument containing the value of C<$ENV{PERLDB_NOTTY}> at +startup, or C<"/tmp/perldbtty$$"> otherwise. This file is not +inspected for proper ownership, so security hazards are theoretically +possible. =item C<ReadLine> -If false, readline support in debugger is disabled, so you can debug -ReadLine applications. +If false, readline support in the debugger is disabled in order +to debug applications that themselves use ReadLine. =item C<NonStop> -If set, debugger goes into noninteractive mode until interrupted, or +If set, the debugger goes into non-interactive mode until interrupted, or programmatically by setting $DB::signal or $DB::single. =back Here's an example of using the C<$ENV{PERLDB_OPTS}> variable: - $ PERLDB_OPTS="N f=2" perl -d myprogram + $ PERLDB_OPTS="NonStop frame=2" perl -d myprogram -will run the script C<myprogram> without human intervention, printing -out the call tree with entry and exit points. Note that C<N f=2> is -equivalent to C<NonStop=1 frame=2>. Note also that at the moment when -this documentation was written all the options to the debugger could -be uniquely abbreviated by the first letter (with exception of -C<Dump*> options). +That will run the script B<myprogram> without human intervention, +printing out the call tree with entry and exit points. Note that +C<NonStop=1 frame=2> is equivalent to C<N f=2>, and that originally, +options could be uniquely abbreviated by the first letter (modulo +the C<Dump*> options). It is nevertheless recommended that you +always spell them out in full for legibility and future compatibility. -Other examples may include +Other examples include - $ PERLDB_OPTS="N f A L=listing" perl -d myprogram + $ PERLDB_OPTS="NonStop frame=2" perl -d myprogram -- runs script noninteractively, printing info on each entry into a -subroutine and each executed line into the file F<listing>. (If you -interrupt it, you would better reset C<LineInfo> to something +which runs script non-interactively, printing info on each entry +into a subroutine and each executed line into the file named F<listing>. +(If you interrupt it, you would better reset C<LineInfo> to something "interactive"!) +Other examples include (using standard shell syntax to show environment +variable settings): - $ env "PERLDB_OPTS=R=0 TTY=/dev/ttyc" perl -d myprogram + $ ( PERLDB_OPTS="NonStop frame=1 AutoTrace LineInfo=tperl.out" + perl -d myprogram ) -may be useful for debugging a program which uses C<Term::ReadLine> -itself. Do not forget detach shell from the TTY in the window which -corresponds to F</dev/ttyc>, say, by issuing a command like +which may be useful for debugging a program that uses C<Term::ReadLine> +itself. Do not forget to detach your shell from the TTY in the window that +corresponds to F</dev/ttyXX>, say, by issuing a command like $ sleep 1000000 -See L<"Debugger Internals"> below for more details. - -=item E<lt> [ command ] - -Set an action (Perl command) to happen before every debugger prompt. -A multi-line command may be entered by backslashing the newlines. If -C<command> is missing, resets the list of actions. - -=item E<lt>E<lt> command - -Add an action (Perl command) to happen before every debugger prompt. -A multi-line command may be entered by backslashing the newlines. - -=item E<gt> command - -Set an action (Perl command) to happen after the prompt when you've -just given a command to return to executing the script. A multi-line -command may be entered by backslashing the newlines. If C<command> is -missing, resets the list of actions. - -=item E<gt>E<gt> command - -Adds an action (Perl command) to happen after the prompt when you've -just given a command to return to executing the script. A multi-line -command may be entered by backslashing the newlines. - -=item { [ command ] - -Set an action (debugger command) to happen before every debugger prompt. -A multi-line command may be entered by backslashing the newlines. If -C<command> is missing, resets the list of actions. - -=item {{ command - -Add an action (debugger command) to happen before every debugger prompt. -A multi-line command may be entered by backslashing the newlines. - -=item ! number - -Redo a previous command (default previous command). - -=item ! -number - -Redo number'th-to-last command. - -=item ! pattern - -Redo last command that started with pattern. -See C<O recallCommand>, too. - -=item !! cmd - -Run cmd in a subprocess (reads from DB::IN, writes to DB::OUT) -See C<O shellBang> too. - -=item H -number - -Display last n commands. Only commands longer than one character are -listed. If number is omitted, lists them all. - -=item q or ^D - -Quit. ("quit" doesn't work for this.) This is the only supported way -to exit the debugger, though typing C<exit> twice may do it too. - -Set an C<O>ption C<inhibit_exit> to 0 if you want to be able to I<step -off> the end the script. You may also need to set C<$finished> to 0 at -some moment if you want to step through global destruction. - -=item R - -Restart the debugger by B<exec>ing a new session. It tries to maintain -your history across this, but internal settings and command line options -may be lost. - -Currently the following setting are preserved: history, breakpoints, -actions, debugger C<O>ptions, and the following command line -options: B<-w>, B<-I>, and B<-e>. - -=item |dbcmd - -Run debugger command, piping DB::OUT to current pager. - -=item ||dbcmd - -Same as C<|dbcmd> but DB::OUT is temporarily B<select>ed as well. -Often used with commands that would otherwise produce long -output, such as - - |V main - -=item = [alias value] - -Define a command alias, like - - = quit q - -or list current aliases. - -=item command - -Execute command as a Perl statement. A missing semicolon will be -supplied. - -=item m expr - -The expression is evaluated, and the methods which may be applied to -the result are listed. - -=item m package - -The methods which may be applied to objects in the C<package> are listed. - -=back +See L<perldebguts/"Debugger Internals"> for details. =head2 Debugger input/output @@ -620,19 +694,20 @@ or even DB<<17>> -where that number is the command number, which you'd use to access with -the builtin B<csh>-like history mechanism, e.g., C<!17> would repeat -command number 17. The number of angle brackets indicates the depth of -the debugger. You could get more than one set of brackets, for example, if -you'd already at a breakpoint and then printed out the result of a -function call that itself also has a breakpoint, or you step into an -expression via C<s/n/t expression> command. +where that number is the command number, and which you'd use to +access with the built-in B<csh>-like history mechanism. For example, +C<!17> would repeat command number 17. The depth of the angle +brackets indicates the nesting depth of the debugger. You could +get more than one set of brackets, for example, if you'd already +at a breakpoint and then printed the result of a function call that +itself has a breakpoint, or you step into an expression via C<s/n/t +expression> command. =item Multiline commands If you want to enter a multi-line command, such as a subroutine -definition with several statements, or a format, you may escape the -newline that would normally end the debugger command with a backslash. +definition with several statements or a format, escape the newline +that would normally end the debugger command with a backslash. Here's an example: DB<1> for (1..4) { \ @@ -655,24 +730,26 @@ look like: @ = Ambulation::legs(1, 2, 3, 4) called from file `camel_flea' line 7 $ = main::pests('bactrian', 4) called from file `camel_flea' line 4 -The left-hand character up there tells whether the function was called -in a scalar or list context (we bet you can tell which is which). What -that says is that you were in the function C<main::infested> when you ran -the stack dump, and that it was called in a scalar context from line 10 -of the file I<Ambulation.pm>, but without any arguments at all, meaning -it was called as C<&infested>. The next stack frame shows that the -function C<Ambulation::legs> was called in a list context from the -I<camel_flea> file with four arguments. The last stack frame shows that -C<main::pests> was called in a scalar context, also from I<camel_flea>, -but from line 4. +The left-hand character up there indicates the context in which the +function was called, with C<$> and C<@> meaning scalar or list +contexts respectively, and C<.> meaning void context (which is +actually a sort of scalar context). The display above says +that you were in the function C<main::infested> when you ran the +stack dump, and that it was called in scalar context from line +10 of the file I<Ambulation.pm>, but without any arguments at all, +meaning it was called as C<&infested>. The next stack frame shows +that the function C<Ambulation::legs> was called in list context +from the I<camel_flea> file with four arguments. The last stack +frame shows that C<main::pests> was called in scalar context, +also from I<camel_flea>, but from line 4. -Note that if you execute C<T> command from inside an active C<use> -statement, the backtrace will contain both C<require> -frame and an C<eval>) frame. +If you execute the C<T> command from inside an active C<use> +statement, the backtrace will contain both a C<require> frame and +an C<eval>) frame. -=item Listing +=item Line Listing Format -Listing given via different flavors of C<l> command looks like this: +This shows the sorts of output the C<l> command can produce: DB<<13>> l 101: @i{@i} = (); @@ -686,976 +763,185 @@ Listing given via different flavors of C<l> command looks like this: 109:a if ($extra-- > 0) { 110: %isa = ($pack,1); -Note that the breakable lines are marked with C<:>, lines with -breakpoints are marked by C<b>, with actions by C<a>, and the -next executed line is marked by C<==E<gt>>. +Breakable lines are marked with C<:>. Lines with breakpoints are +marked by C<b> and those with actions by C<a>. The line that's +about to be executed is marked by C<< ==> >>. =item Frame listing -When C<frame> option is set, debugger would print entered (and -optionally exited) subroutines in different styles. - -What follows is the start of the listing of - - env "PERLDB_OPTS=f=n N" perl -d -V - -for different values of C<n>: - -=over 4 - -=item 1 - - entering main::BEGIN - entering Config::BEGIN - Package lib/Exporter.pm. - Package lib/Carp.pm. - Package lib/Config.pm. - entering Config::TIEHASH - entering Exporter::import - entering Exporter::export - entering Config::myconfig - entering Config::FETCH - entering Config::FETCH - entering Config::FETCH - entering Config::FETCH - -=item 2 - - entering main::BEGIN - entering Config::BEGIN - Package lib/Exporter.pm. - Package lib/Carp.pm. - exited Config::BEGIN - Package lib/Config.pm. - entering Config::TIEHASH - exited Config::TIEHASH - entering Exporter::import - entering Exporter::export - exited Exporter::export - exited Exporter::import - exited main::BEGIN - entering Config::myconfig - entering Config::FETCH - exited Config::FETCH - entering Config::FETCH - exited Config::FETCH - entering Config::FETCH - -=item 4 - - in $=main::BEGIN() from /dev/nul:0 - in $=Config::BEGIN() from lib/Config.pm:2 - Package lib/Exporter.pm. - Package lib/Carp.pm. - Package lib/Config.pm. - in $=Config::TIEHASH('Config') from lib/Config.pm:644 - in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 - in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from li - in @=Config::myconfig() from /dev/nul:0 - in $=Config::FETCH(ref(Config), 'package') from lib/Config.pm:574 - in $=Config::FETCH(ref(Config), 'baserev') from lib/Config.pm:574 - in $=Config::FETCH(ref(Config), 'PATCHLEVEL') from lib/Config.pm:574 - in $=Config::FETCH(ref(Config), 'SUBVERSION') from lib/Config.pm:574 - in $=Config::FETCH(ref(Config), 'osname') from lib/Config.pm:574 - in $=Config::FETCH(ref(Config), 'osvers') from lib/Config.pm:574 - -=item 6 - - in $=main::BEGIN() from /dev/nul:0 - in $=Config::BEGIN() from lib/Config.pm:2 - Package lib/Exporter.pm. - Package lib/Carp.pm. - out $=Config::BEGIN() from lib/Config.pm:0 - Package lib/Config.pm. - in $=Config::TIEHASH('Config') from lib/Config.pm:644 - out $=Config::TIEHASH('Config') from lib/Config.pm:644 - in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 - in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/ - out $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/ - out $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 - out $=main::BEGIN() from /dev/nul:0 - in @=Config::myconfig() from /dev/nul:0 - in $=Config::FETCH(ref(Config), 'package') from lib/Config.pm:574 - out $=Config::FETCH(ref(Config), 'package') from lib/Config.pm:574 - in $=Config::FETCH(ref(Config), 'baserev') from lib/Config.pm:574 - out $=Config::FETCH(ref(Config), 'baserev') from lib/Config.pm:574 - in $=Config::FETCH(ref(Config), 'PATCHLEVEL') from lib/Config.pm:574 - out $=Config::FETCH(ref(Config), 'PATCHLEVEL') from lib/Config.pm:574 - in $=Config::FETCH(ref(Config), 'SUBVERSION') from lib/Config.pm:574 - -=item 14 - - in $=main::BEGIN() from /dev/nul:0 - in $=Config::BEGIN() from lib/Config.pm:2 - Package lib/Exporter.pm. - Package lib/Carp.pm. - out $=Config::BEGIN() from lib/Config.pm:0 - Package lib/Config.pm. - in $=Config::TIEHASH('Config') from lib/Config.pm:644 - out $=Config::TIEHASH('Config') from lib/Config.pm:644 - in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 - in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/E - out $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/E - out $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/nul:0 - out $=main::BEGIN() from /dev/nul:0 - in @=Config::myconfig() from /dev/nul:0 - in $=Config::FETCH('Config=HASH(0x1aa444)', 'package') from lib/Config.pm:574 - out $=Config::FETCH('Config=HASH(0x1aa444)', 'package') from lib/Config.pm:574 - in $=Config::FETCH('Config=HASH(0x1aa444)', 'baserev') from lib/Config.pm:574 - out $=Config::FETCH('Config=HASH(0x1aa444)', 'baserev') from lib/Config.pm:574 - -=item 30 - - in $=CODE(0x15eca4)() from /dev/null:0 - in $=CODE(0x182528)() from lib/Config.pm:2 - Package lib/Exporter.pm. - out $=CODE(0x182528)() from lib/Config.pm:0 - scalar context return from CODE(0x182528): undef - Package lib/Config.pm. - in $=Config::TIEHASH('Config') from lib/Config.pm:628 - out $=Config::TIEHASH('Config') from lib/Config.pm:628 - scalar context return from Config::TIEHASH: empty hash - in $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 - in $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/Exporter.pm:171 - out $=Exporter::export('Config', 'main', 'myconfig', 'config_vars') from lib/Exporter.pm:171 - scalar context return from Exporter::export: '' - out $=Exporter::import('Config', 'myconfig', 'config_vars') from /dev/null:0 - scalar context return from Exporter::import: '' - - -=back - -In all the cases indentation of lines shows the call tree, if bit 2 of -C<frame> is set, then a line is printed on exit from a subroutine as -well, if bit 4 is set, then the arguments are printed as well as the -caller info, if bit 8 is set, the arguments are printed even if they -are tied or references, if bit 16 is set, the return value is printed -as well. - -When a package is compiled, a line like this - - Package lib/Carp.pm. - -is printed with proper indentation. +When the C<frame> option is set, the debugger would print entered (and +optionally exited) subroutines in different styles. See L<perldebguts> +for incredibly long examples of these. =back =head2 Debugging compile-time statements -If you have any compile-time executable statements (code within a BEGIN -block or a C<use> statement), these will C<NOT> be stopped by debugger, -although C<require>s will (and compile-time statements can be traced -with C<AutoTrace> option set in C<PERLDB_OPTS>). From your own Perl -code, however, you can +If you have compile-time executable statements (such as code within +BEGIN and CHECK blocks or C<use> statements), these will I<not> be +stopped by debugger, although C<require>s and INIT blocks will, and +compile-time statements can be traced with C<AutoTrace> option set +in C<PERLDB_OPTS>). From your own Perl code, however, you can transfer control back to the debugger using the following statement, which is harmless if the debugger is not running: $DB::single = 1; -If you set C<$DB::single> to the value 2, it's equivalent to having +If you set C<$DB::single> to 2, it's equivalent to having just typed the C<n> command, whereas a value of 1 means the C<s> command. The C<$DB::trace> variable should be set to 1 to simulate having typed the C<t> command. -Another way to debug compile-time code is to start debugger, set a -breakpoint on I<load> of some module thusly +Another way to debug compile-time code is to start the debugger, set a +breakpoint on the I<load> of some module: DB<7> b load f:/perllib/lib/Carp.pm Will stop on load of `f:/perllib/lib/Carp.pm'. -and restart debugger by C<R> command (if possible). One can use C<b +and then restart the debugger using the C<R> command (if possible). One can use C<b compile subname> for the same purpose. =head2 Debugger Customization -Most probably you do not want to modify the debugger, it contains enough -hooks to satisfy most needs. You may change the behaviour of debugger -from the debugger itself, using C<O>ptions, from the command line via -C<PERLDB_OPTS> environment variable, and from I<customization files>. +The debugger probably contains enough configuration hooks that you +won't ever have to modify it yourself. You may change the behaviour +of debugger from within the debugger using its C<O> command, from +the command line via the C<PERLDB_OPTS> environment variable, and +from customization files. -You can do some customization by setting up a F<.perldb> file which +You can do some customization by setting up a F<.perldb> file, which contains initialization code. For instance, you could make aliases like these (the last one is one people expect to be there): $DB::alias{'len'} = 's/^len(.*)/p length($1)/'; $DB::alias{'stop'} = 's/^stop (at|in)/b/'; $DB::alias{'ps'} = 's/^ps\b/p scalar /'; - $DB::alias{'quit'} = 's/^quit(\s*)/exit\$/'; + $DB::alias{'quit'} = 's/^quit(\s*)/exit/'; -One changes options from F<.perldb> file via calls like this one; +You can change options from F<.perldb> by using calls like this one; parse_options("NonStop=1 LineInfo=db.out AutoTrace=1 frame=2"); -(the code is executed in the package C<DB>). Note that F<.perldb> is +The code is executed in the package C<DB>. Note that F<.perldb> is processed before processing C<PERLDB_OPTS>. If F<.perldb> defines the -subroutine C<afterinit>, it is called after all the debugger +subroutine C<afterinit>, that function is called after debugger initialization ends. F<.perldb> may be contained in the current -directory, or in the C<LOGDIR>/C<HOME> directory. +directory, or in the home directory. Because this file is sourced +in by Perl and may contain arbitrary commands, for security reasons, +it must be owned by the superuser or the current user, and writable +by no one but its owner. -If you want to modify the debugger, copy F<perl5db.pl> from the Perl -library to another name and modify it as necessary. You'll also want -to set your C<PERL5DB> environment variable to say something like this: +If you want to modify the debugger, copy F<perl5db.pl> from the +Perl library to another name and hack it to your heart's content. +You'll then want to set your C<PERL5DB> environment variable to say +something like this: BEGIN { require "myperl5db.pl" } -As the last resort, one can use C<PERL5DB> to customize debugger by -directly setting internal variables or calling debugger functions. +As a last resort, you could also use C<PERL5DB> to customize the debugger +by directly setting internal variables or calling debugger functions. + +Note that any variables and functions that are not documented in +this document (or in L<perldebguts>) are considered for internal +use only, and as such are subject to change without notice. =head2 Readline Support -As shipped, the only command line history supplied is a simplistic one +As shipped, the only command-line history supplied is a simplistic one that checks for leading exclamation points. However, if you install the Term::ReadKey and Term::ReadLine modules from CPAN, you will have full editing capabilities much like GNU I<readline>(3) provides. Look for these in the F<modules/by-module/Term> directory on CPAN. +These do not support normal B<vi> command-line editing, however. -A rudimentary command line completion is also available. +A rudimentary command-line completion is also available. Unfortunately, the names of lexical variables are not available for completion. =head2 Editor Support for Debugging -If you have GNU B<emacs> installed on your system, it can interact with -the Perl debugger to provide an integrated software development -environment reminiscent of its interactions with C debuggers. - -Perl is also delivered with a start file for making B<emacs> act like a -syntax-directed editor that understands (some of) Perl's syntax. Look in -the I<emacs> directory of the Perl source distribution. - -(Historically, a similar setup for interacting with B<vi> and the -X11 window system had also been available, but at the time of this -writing, no debugger support for B<vi> currently exists.) - -=head2 The Perl Profiler - -If you wish to supply an alternative debugger for Perl to run, just -invoke your script with a colon and a package argument given to the B<-d> -flag. One of the most popular alternative debuggers for Perl is -B<DProf>, the Perl profiler. As of this writing, B<DProf> is not -included with the standard Perl distribution, but it is expected to -be included soon, for certain values of "soon". - -Meanwhile, you can fetch the Devel::Dprof module from CPAN. Assuming -it's properly installed on your system, to profile your Perl program in -the file F<mycode.pl>, just type: - - perl -d:DProf mycode.pl - -When the script terminates the profiler will dump the profile information -to a file called F<tmon.out>. A tool like B<dprofpp> (also supplied with -the Devel::DProf package) can be used to interpret the information which is -in that profile. - -=head2 Debugger support in perl - -When you call the B<caller> function (see L<perlfunc/caller>) from the -package DB, Perl sets the array @DB::args to contain the arguments the -corresponding stack frame was called with. - -If perl is run with B<-d> option, the following additional features -are enabled (cf. L<perlvar/$^P>): - -=over - -=item * - -Perl inserts the contents of C<$ENV{PERL5DB}> (or C<BEGIN {require -'perl5db.pl'}> if not present) before the first line of the -application. - -=item * - -The array C<@{"_E<lt>$filename"}> is the line-by-line contents of -$filename for all the compiled files. Same for C<eval>ed strings which -contain subroutines, or which are currently executed. The C<$filename> -for C<eval>ed strings looks like C<(eval 34)>. - -=item * - -The hash C<%{"_E<lt>$filename"}> contains breakpoints and action (it is -keyed by line number), and individual entries are settable (as opposed -to the whole hash). Only true/false is important to Perl, though the -values used by F<perl5db.pl> have the form -C<"$break_condition\0$action">. Values are magical in numeric context: -they are zeros if the line is not breakable. - -Same for evaluated strings which contain subroutines, or which are -currently executed. The $filename for C<eval>ed strings looks like -C<(eval 34)>. - -=item * - -The scalar C<${"_E<lt>$filename"}> contains C<"_E<lt>$filename">. Same for -evaluated strings which contain subroutines, or which are currently -executed. The $filename for C<eval>ed strings looks like C<(eval -34)>. - -=item * - -After each C<require>d file is compiled, but before it is executed, -C<DB::postponed(*{"_E<lt>$filename"})> is called (if subroutine -C<DB::postponed> exists). Here the $filename is the expanded name of -the C<require>d file (as found in values of %INC). - -=item * - -After each subroutine C<subname> is compiled existence of -C<$DB::postponed{subname}> is checked. If this key exists, -C<DB::postponed(subname)> is called (if subroutine C<DB::postponed> -exists). - -=item * - -A hash C<%DB::sub> is maintained, with keys being subroutine names, -values having the form C<filename:startline-endline>. C<filename> has -the form C<(eval 31)> for subroutines defined inside C<eval>s. - -=item * - -When execution of the application reaches a place that can have -a breakpoint, a call to C<DB::DB()> is performed if any one of -variables $DB::trace, $DB::single, or $DB::signal is true. (Note that -these variables are not C<local>izable.) This feature is disabled when -the control is inside C<DB::DB()> or functions called from it (unless -C<$^D & (1E<lt>E<lt>30)>). - -=item * - -When execution of the application reaches a subroutine call, a call -to C<&DB::sub>(I<args>) is performed instead, with C<$DB::sub> being -the name of the called subroutine. (Unless the subroutine is compiled -in the package C<DB>.) - -=back - -Note that if C<&DB::sub> needs some external data to be setup for it -to work, no subroutine call is possible until this is done. For the -standard debugger C<$DB::deep> (how many levels of recursion deep into -the debugger you can go before a mandatory break) gives an example of -such a dependency. - -The minimal working debugger consists of one line - - sub DB::DB {} - -which is quite handy as contents of C<PERL5DB> environment -variable: - - env "PERL5DB=sub DB::DB {}" perl -d your-script - -Another (a little bit more useful) minimal debugger can be created -with the only line being - - sub DB::DB {print ++$i; scalar <STDIN>} - -This debugger would print the sequential number of encountered -statement, and would wait for your C<CR> to continue. - -The following debugger is quite functional: - - { - package DB; - sub DB {} - sub sub {print ++$i, " $sub\n"; &$sub} - } - -It prints the sequential number of subroutine call and the name of the -called subroutine. Note that C<&DB::sub> should be compiled into the -package C<DB>. - -=head2 Debugger Internals - -At the start, the debugger reads your rc file (F<./.perldb> or -F<~/.perldb> under Unix), which can set important options. This file may -define a subroutine C<&afterinit> to be executed after the debugger is -initialized. - -After the rc file is read, the debugger reads environment variable -PERLDB_OPTS and parses it as a rest of C<O ...> line in debugger prompt. - -It also maintains magical internal variables, such as C<@DB::dbline>, -C<%DB::dbline>, which are aliases for C<@{"::_<current_file"}> -C<%{"::_<current_file"}>. Here C<current_file> is the currently -selected (with the debugger's C<f> command, or by flow of execution) -file. - -Some functions are provided to simplify customization. See L<"Debugger -Customization"> for description of C<DB::parse_options(string)>. The -function C<DB::dump_trace(skip[, count])> skips the specified number -of frames, and returns a list containing info about the caller -frames (all if C<count> is missing). Each entry is a hash with keys -C<context> (C<$> or C<@>), C<sub> (subroutine name, or info about -eval), C<args> (C<undef> or a reference to an array), C<file>, and -C<line>. - -The function C<DB::print_trace(FH, skip[, count[, short]])> prints -formatted info about caller frames. The last two functions may be -convenient as arguments to C<E<lt>>, C<E<lt>E<lt>> commands. - -=head2 Other resources - -You did try the B<-w> switch, didn't you? - -=head2 BUGS - -You cannot get the stack frame information or otherwise debug functions -that were not compiled by Perl, such as C or C++ extensions. - -If you alter your @_ arguments in a subroutine (such as with B<shift> -or B<pop>, the stack backtrace will not show the original values. - -=head1 Debugging Perl memory usage - -Perl is I<very> frivolous with memory. There is a saying that to -estimate memory usage of Perl, assume a reasonable algorithm of -allocation, and multiply your estimates by 10. This is not absolutely -true, but may give you a good grasp of what happens. - -Say, an integer cannot take less than 20 bytes of memory, a float -cannot take less than 24 bytes, a string cannot take less than 32 -bytes (all these examples assume 32-bit architectures, the result are -much worse on 64-bit architectures). If a variable is accessed in two -of three different ways (which require an integer, a float, or a -string), the memory footprint may increase by another 20 bytes. A -sloppy malloc() implementation will make these numbers yet more. - -On the opposite end of the scale, a declaration like - - sub foo; - -may take (on some versions of perl) up to 500 bytes of memory. - -Off-the-cuff anecdotal estimates of a code bloat give a factor around -8. This means that the compiled form of reasonable (commented -indented etc.) code will take approximately 8 times more than the -disk space the code takes. - -There are two Perl-specific ways to analyze the memory usage: -$ENV{PERL_DEBUG_MSTATS} and B<-DL> switch. First one is available -only if perl is compiled with Perl's malloc(), the second one only if -Perl compiled with C<-DDEBUGGING> (as with giving C<-D optimise=-g> -option to F<Configure>). - -=head2 Using C<$ENV{PERL_DEBUG_MSTATS}> - -If your perl is using Perl's malloc(), and compiled with correct -switches (this is the default), then it will print memory usage -statistics after compiling your code (if C<$ENV{PERL_DEBUG_MSTATS}> > -1), and before termination of the script (if -C<$ENV{PERL_DEBUG_MSTATS}> >= 1). The report format is similar to one -in the following example: - - env PERL_DEBUG_MSTATS=2 perl -e "require Carp" - Memory allocation statistics after compilation: (buckets 4(4)..8188(8192) - 14216 free: 130 117 28 7 9 0 2 2 1 0 0 - 437 61 36 0 5 - 60924 used: 125 137 161 55 7 8 6 16 2 0 1 - 74 109 304 84 20 - Total sbrk(): 77824/21:119. Odd ends: pad+heads+chain+tail: 0+636+0+2048. - Memory allocation statistics after execution: (buckets 4(4)..8188(8192) - 30888 free: 245 78 85 13 6 2 1 3 2 0 1 - 315 162 39 42 11 - 175816 used: 265 176 1112 111 26 22 11 27 2 1 1 - 196 178 1066 798 39 - Total sbrk(): 215040/47:145. Odd ends: pad+heads+chain+tail: 0+2192+0+6144. - -It is possible to ask for such a statistic at arbitrary moment by -using Devel::Peek::mstats() (module Devel::Peek is available on CPAN). - -Here is the explanation of different parts of the format: - -=over - -=item C<buckets SMALLEST(APPROX)..GREATEST(APPROX)> - -Perl's malloc() uses bucketed allocations. Every request is rounded -up to the closest bucket size available, and a bucket of these size is -taken from the pool of the buckets of this size. - -The above line describes limits of buckets currently in use. Each -bucket has two sizes: memory footprint, and the maximal size of user -data which may be put into this bucket. Say, in the above example the -smallest bucket is both sizes 4. The biggest bucket has usable size -8188, and the memory footprint 8192. +If you have the FSF's version of B<emacs> installed on your system, +it can interact with the Perl debugger to provide an integrated +software development environment reminiscent of its interactions +with C debuggers. -With debugging Perl some buckets may have negative usable size. This -means that these buckets cannot (and will not) be used. For greater -buckets the memory footprint may be one page greater than a power of -2. In such a case the corresponding power of two is printed instead -in the C<APPROX> field above. +Perl comes with a start file for making B<emacs> act like a +syntax-directed editor that understands (some of) Perl's syntax. +Look in the I<emacs> directory of the Perl source distribution. -=item Free/Used +A similar setup by Tom Christiansen for interacting with any +vendor-shipped B<vi> and the X11 window system is also available. +This works similarly to the integrated multiwindow support that +B<emacs> provides, where the debugger drives the editor. At the +time of this writing, however, that tool's eventual location in the +Perl distribution was uncertain. -The following 1 or 2 rows of numbers correspond to the number of -buckets of each size between C<SMALLEST> and C<GREATEST>. In the -first row the sizes (memory footprints) of buckets are powers of two -(or possibly one page greater). In the second row (if present) the -memory footprints of the buckets are between memory footprints of two -buckets "above". +Users of B<vi> should also look into B<vim> and B<gvim>, the mousey +and windy version, for coloring of Perl keywords. -Say, with the above example the memory footprints are (with current -algorithm) +Note that only perl can truly parse Perl, so all such CASE tools +fall somewhat short of the mark, especially if you don't program +your Perl as a C programmer might. - free: 8 16 32 64 128 256 512 1024 2048 4096 8192 - 4 12 24 48 80 - -With non-C<DEBUGGING> perl the buckets starting from C<128>-long ones -have 4-byte overhead, thus 8192-long bucket may take up to -8188-byte-long allocations. - -=item C<Total sbrk(): SBRKed/SBRKs:CONTINUOUS> - -The first two fields give the total amount of memory perl sbrk()ed, -and number of sbrk()s used. The third number is what perl thinks -about continuity of returned chunks. As far as this number is -positive, malloc() will assume that it is probable that sbrk() will -provide continuous memory. - -The amounts sbrk()ed by external libraries is not counted. - -=item C<pad: 0> - -The amount of sbrk()ed memory needed to keep buckets aligned. - -=item C<heads: 2192> - -While memory overhead of bigger buckets is kept inside the bucket, for -smaller buckets it is kept in separate areas. This field gives the -total size of these areas. - -=item C<chain: 0> - -malloc() may want to subdivide a bigger bucket into smaller buckets. -If only a part of the deceased-bucket is left non-subdivided, the rest -is kept as an element of a linked list. This field gives the total -size of these chunks. - -=item C<tail: 6144> - -To minimize amount of sbrk()s malloc() asks for more memory. This -field gives the size of the yet-unused part, which is sbrk()ed, but -never touched. - -=back - -=head2 Example of using B<-DL> switch - -Below we show how to analyse memory usage by - - do 'lib/auto/POSIX/autosplit.ix'; - -The file in question contains a header and 146 lines similar to - - sub getcwd ; - -B<Note:> I<the discussion below supposes 32-bit architecture. In the -newer versions of perl the memory usage of the constructs discussed -here is much improved, but the story discussed below is a real-life -story. This story is very terse, and assumes more than cursory -knowledge of Perl internals.> - -Here is the itemized list of Perl allocations performed during parsing -of this file: - - !!! "after" at test.pl line 3. - Id subtot 4 8 12 16 20 24 28 32 36 40 48 56 64 72 80 80+ - 0 02 13752 . . . . 294 . . . . . . . . . . 4 - 0 54 5545 . . 8 124 16 . . . 1 1 . . . . . 3 - 5 05 32 . . . . . . . 1 . . . . . . . . - 6 02 7152 . . . . . . . . . . 149 . . . . . - 7 02 3600 . . . . . 150 . . . . . . . . . . - 7 03 64 . -1 . 1 . . 2 . . . . . . . . . - 7 04 7056 . . . . . . . . . . . . . . . 7 - 7 17 38404 . . . . . . . 1 . . 442 149 . . 147 . - 9 03 2078 17 249 32 . . . . 2 . . . . . . . . - - -To see this list insert two C<warn('!...')> statements around the call: - - warn('!'); - do 'lib/auto/POSIX/autosplit.ix'; - warn('!!! "after"'); - -and run it with B<-DL> option. The first warn() will print memory -allocation info before the parsing of the file, and will memorize the -statistics at this point (we ignore what it prints). The second warn() -will print increments w.r.t. this memorized statistics. This is the -above printout. - -Different I<Id>s on the left correspond to different subsystems of -perl interpreter, they are just first argument given to perl memory -allocation API New(). To find what C<9 03> means C<grep> the perl -source for C<903>. You will see that it is F<util.c>, function -savepvn(). This function is used to store a copy of existing chunk of -memory. Using C debugger, one can see that it is called either -directly from gv_init(), or via sv_magic(), and gv_init() is called -from gv_fetchpv() - which is called from newSUB(). - -B<Note:> to reach this place in debugger and skip all the calls to -savepvn during the compilation of the main script, set a C breakpoint -in Perl_warn(), C<continue> this point is reached, I<then> set -breakpoint in Perl_savepvn(). Note that you may need to skip a -handful of Perl_savepvn() which do not correspond to mass production -of CVs (there are more C<903> allocations than 146 similar lines of -F<lib/auto/POSIX/autosplit.ix>). Note also that C<Perl_> prefixes are -added by macroization code in perl header files to avoid conflicts -with external libraries. - -Anyway, we see that C<903> ids correspond to creation of globs, twice -per glob - for glob name, and glob stringification magic. - -Here are explanations for other I<Id>s above: - -=over - -=item C<717> - -is for creation of bigger C<XPV*> structures. In the above case it -creates 3 C<AV> per subroutine, one for a list of lexical variable -names, one for a scratchpad (which contains lexical variables and -C<targets>), and one for the array of scratchpads needed for -recursion. - -It also creates a C<GV> and a C<CV> per subroutine (all called from -start_subparse()). - -=item C<002> - -Creates C array corresponding to the C<AV> of scratchpads, and the -scratchpad itself (the first fake entry of this scratchpad is created -though the subroutine itself is not defined yet). - -It also creates C arrays to keep data for the stash (this is one HV, -but it grows, thus there are 4 big allocations: the big chunks are not -freed, but are kept as additional arenas for C<SV> allocations). - -=item C<054> - -creates a C<HEK> for the name of the glob for the subroutine (this -name is a key in a I<stash>). - -Big allocations with this I<Id> correspond to allocations of new -arenas to keep C<HE>. - -=item C<602> - -creates a C<GP> for the glob for the subroutine. - -=item C<702> - -creates the C<MAGIC> for the glob for the subroutine. - -=item C<704> - -creates I<arenas> which keep SVs. - -=back - -=head2 B<-DL> details - -If Perl is run with B<-DL> option, then warn()s which start with `!' -behave specially. They print a list of I<categories> of memory -allocations, and statistics of allocations of different sizes for -these categories. - -If warn() string starts with - -=over - -=item C<!!!> - -print changed categories only, print the differences in counts of allocations; - -=item C<!!> - -print grown categories only; print the absolute values of counts, and totals; - -=item C<!> - -print nonempty categories, print the absolute values of counts and totals. +=head2 The Perl Profiler -=back +If you wish to supply an alternative debugger for Perl to run, just +invoke your script with a colon and a package argument given to the +B<-d> flag. The most popular alternative debuggers for Perl is the +Perl profiler. Devel::DProf is now included with the standard Perl +distribution. To profile your Perl program in the file F<mycode.pl>, +just type: -=head2 Limitations of B<-DL> statistic + $ perl -d:DProf mycode.pl -If an extension or an external library does not use Perl API to -allocate memory, these allocations are not counted. +When the script terminates the profiler will dump the profile +information to a file called F<tmon.out>. A tool like B<dprofpp>, +also supplied with the standard Perl distribution, can be used to +interpret the information in that profile. =head1 Debugging regular expressions -There are two ways to enable debugging output for regular expressions. - -If your perl is compiled with C<-DDEBUGGING>, you may use the -B<-Dr> flag on the command line. - -Otherwise, one can C<use re 'debug'>, which has effects both at -compile time, and at run time (and is I<not> lexically scoped). - -=head2 Compile-time output - -The debugging output for the compile time looks like this: - - compiling RE `[bc]d(ef*g)+h[ij]k$' - size 43 first at 1 - 1: ANYOF(11) - 11: EXACT <d>(13) - 13: CURLYX {1,32767}(27) - 15: OPEN1(17) - 17: EXACT <e>(19) - 19: STAR(22) - 20: EXACT <f>(0) - 22: EXACT <g>(24) - 24: CLOSE1(26) - 26: WHILEM(0) - 27: NOTHING(28) - 28: EXACT <h>(30) - 30: ANYOF(40) - 40: EXACT <k>(42) - 42: EOL(43) - 43: END(0) - anchored `de' at 1 floating `gh' at 3..2147483647 (checking floating) - stclass `ANYOF' minlen 7 +C<use re 'debug'> enables you to see the gory details of how the +Perl regular expression engine works. In order to understand this +typically voluminous output, one must not only have some idea about +about how regular expression matching works in general, but also +know how Perl's regular expressions are internally compiled into +an automaton. These matters are explored in some detail in +L<perldebguts/"Debugging regular expressions">. -The first line shows the pre-compiled form of the regexp, and the -second shows the size of the compiled form (in arbitrary units, -usually 4-byte words) and the label I<id> of the first node which -does a match. +=head1 Debugging memory usage -The last line (split into two lines in the above) contains the optimizer -info. In the example shown, the optimizer found that the match -should contain a substring C<de> at the offset 1, and substring C<gh> -at some offset between 3 and infinity. Moreover, when checking for -these substrings (to abandon impossible matches quickly) it will check -for the substring C<gh> before checking for the substring C<de>. The -optimizer may also use the knowledge that the match starts (at the -C<first> I<id>) with a character class, and the match cannot be -shorter than 7 chars. +Perl contains internal support for reporting its own memory usage, +but this is a fairly advanced concept that requires some understanding +of how memory allocation works. +See L<perldebguts/"Debugging Perl memory usage"> for the details. -The fields of interest which may appear in the last line are +=head1 SEE ALSO -=over - -=item C<anchored> I<STRING> C<at> I<POS> - -=item C<floating> I<STRING> C<at> I<POS1..POS2> - -see above; - -=item C<matching floating/anchored> - -which substring to check first; - -=item C<minlen> - -the minimal length of the match; - -=item C<stclass> I<TYPE> - -The type of the first matching node. - -=item C<noscan> - -which advises to not scan for the found substrings; - -=item C<isall> - -which says that the optimizer info is in fact all that the regular -expression contains (thus one does not need to enter the RE engine at -all); - -=item C<GPOS> - -if the pattern contains C<\G>; - -=item C<plus> - -if the pattern starts with a repeated char (as in C<x+y>); - -=item C<implicit> - -if the pattern starts with C<.*>; +You did try the B<-w> switch, didn't you? -=item C<with eval> +L<perldebguts>, +L<re>, +L<DB>, +L<Devel::Dprof>, +L<dprofpp>, +L<Dumpvalue>, +and +L<perlrun>. -if the pattern contain eval-groups (see L<perlre/(?{ code })>); +=head1 BUGS -=item C<anchored(TYPE)> +You cannot get stack frame information or in any fashion debug functions +that were not compiled by Perl, such as those from C or C++ extensions. -if the pattern may -match only at a handful of places (with C<TYPE> being -C<BOL>, C<MBOL>, or C<GPOS>, see the table below). +If you alter your @_ arguments in a subroutine (such as with C<shift> +or C<pop>, the stack backtrace will not show the original values. -=back +The debugger does not currently work in conjunction with the B<-W> +command-line switch, because it itself is not free of warnings. -If a substring is known to match at end-of-line only, it may be -followed by C<$>, as in C<floating `k'$>. - -The optimizer-specific info is used to avoid entering (a slow) RE -engine on strings which will definitely not match. If C<isall> flag -is set, a call to the RE engine may be avoided even when optimizer -found an appropriate place for the match. - -The rest of the output contains the list of I<nodes> of the compiled -form of the RE. Each line has format - -C< >I<id>: I<TYPE> I<OPTIONAL-INFO> (I<next-id>) - -=head2 Types of nodes - -Here is the list of possible types with short descriptions: - - # TYPE arg-description [num-args] [longjump-len] DESCRIPTION - - # Exit points - END no End of program. - SUCCEED no Return from a subroutine, basically. - - # Anchors: - BOL no Match "" at beginning of line. - MBOL no Same, assuming multiline. - SBOL no Same, assuming singleline. - EOS no Match "" at end of string. - EOL no Match "" at end of line. - MEOL no Same, assuming multiline. - SEOL no Same, assuming singleline. - BOUND no Match "" at any word boundary - BOUNDL no Match "" at any word boundary - NBOUND no Match "" at any word non-boundary - NBOUNDL no Match "" at any word non-boundary - GPOS no Matches where last m//g left off. - - # [Special] alternatives - ANY no Match any one character (except newline). - SANY no Match any one character. - ANYOF sv Match character in (or not in) this class. - ALNUM no Match any alphanumeric character - ALNUML no Match any alphanumeric char in locale - NALNUM no Match any non-alphanumeric character - NALNUML no Match any non-alphanumeric char in locale - SPACE no Match any whitespace character - SPACEL no Match any whitespace char in locale - NSPACE no Match any non-whitespace character - NSPACEL no Match any non-whitespace char in locale - DIGIT no Match any numeric character - NDIGIT no Match any non-numeric character - - # BRANCH The set of branches constituting a single choice are hooked - # together with their "next" pointers, since precedence prevents - # anything being concatenated to any individual branch. The - # "next" pointer of the last BRANCH in a choice points to the - # thing following the whole choice. This is also where the - # final "next" pointer of each individual branch points; each - # branch starts with the operand node of a BRANCH node. - # - BRANCH node Match this alternative, or the next... - - # BACK Normal "next" pointers all implicitly point forward; BACK - # exists to make loop structures possible. - # not used - BACK no Match "", "next" ptr points backward. - - # Literals - EXACT sv Match this string (preceded by length). - EXACTF sv Match this string, folded (prec. by length). - EXACTFL sv Match this string, folded in locale (w/len). - - # Do nothing - NOTHING no Match empty string. - # A variant of above which delimits a group, thus stops optimizations - TAIL no Match empty string. Can jump here from outside. - - # STAR,PLUS '?', and complex '*' and '+', are implemented as circular - # BRANCH structures using BACK. Simple cases (one character - # per match) are implemented with STAR and PLUS for speed - # and to minimize recursive plunges. - # - STAR node Match this (simple) thing 0 or more times. - PLUS node Match this (simple) thing 1 or more times. - - CURLY sv 2 Match this simple thing {n,m} times. - CURLYN no 2 Match next-after-this simple thing - # {n,m} times, set parenths. - CURLYM no 2 Match this medium-complex thing {n,m} times. - CURLYX sv 2 Match this complex thing {n,m} times. - - # This terminator creates a loop structure for CURLYX - WHILEM no Do curly processing and see if rest matches. - - # OPEN,CLOSE,GROUPP ...are numbered at compile time. - OPEN num 1 Mark this point in input as start of #n. - CLOSE num 1 Analogous to OPEN. - - REF num 1 Match some already matched string - REFF num 1 Match already matched string, folded - REFFL num 1 Match already matched string, folded in loc. - - # grouping assertions - IFMATCH off 1 2 Succeeds if the following matches. - UNLESSM off 1 2 Fails if the following matches. - SUSPEND off 1 1 "Independent" sub-RE. - IFTHEN off 1 1 Switch, should be preceeded by switcher . - GROUPP num 1 Whether the group matched. - - # Support for long RE - LONGJMP off 1 1 Jump far away. - BRANCHJ off 1 1 BRANCH with long offset. - - # The heavy worker - EVAL evl 1 Execute some Perl code. - - # Modifiers - MINMOD no Next operator is not greedy. - LOGICAL no Next opcode should set the flag only. - - # This is not used yet - RENUM off 1 1 Group with independently numbered parens. - - # This is not really a node, but an optimized away piece of a "long" node. - # To simplify debugging output, we mark it as if it were a node - OPTIMIZED off Placeholder for dump. - -=head2 Run-time output - -First of all, when doing a match, one may get no run-time output even -if debugging is enabled. this means that the RE engine was never -entered, all of the job was done by the optimizer. - -If RE engine was entered, the output may look like this: - - Matching `[bc]d(ef*g)+h[ij]k$' against `abcdefg__gh__' - Setting an EVAL scope, savestack=3 - 2 <ab> <cdefg__gh_> | 1: ANYOF - 3 <abc> <defg__gh_> | 11: EXACT <d> - 4 <abcd> <efg__gh_> | 13: CURLYX {1,32767} - 4 <abcd> <efg__gh_> | 26: WHILEM - 0 out of 1..32767 cc=effff31c - 4 <abcd> <efg__gh_> | 15: OPEN1 - 4 <abcd> <efg__gh_> | 17: EXACT <e> - 5 <abcde> <fg__gh_> | 19: STAR - EXACT <f> can match 1 times out of 32767... - Setting an EVAL scope, savestack=3 - 6 <bcdef> <g__gh__> | 22: EXACT <g> - 7 <bcdefg> <__gh__> | 24: CLOSE1 - 7 <bcdefg> <__gh__> | 26: WHILEM - 1 out of 1..32767 cc=effff31c - Setting an EVAL scope, savestack=12 - 7 <bcdefg> <__gh__> | 15: OPEN1 - 7 <bcdefg> <__gh__> | 17: EXACT <e> - restoring \1 to 4(4)..7 - failed, try continuation... - 7 <bcdefg> <__gh__> | 27: NOTHING - 7 <bcdefg> <__gh__> | 28: EXACT <h> - failed... - failed... - -The most significant information in the output is about the particular I<node> -of the compiled RE which is currently being tested against the target string. -The format of these lines is - -C< >I<STRING-OFFSET> <I<PRE-STRING>> <I<POST-STRING>> |I<ID>: I<TYPE> - -The I<TYPE> info is indented with respect to the backtracking level. -Other incidental information appears interspersed within. - -=cut +If you're in a slow syscall (like C<wait>ing, C<accept>ing, or C<read>ing +from your keyboard or a socket) and haven't set up your own C<$SIG{INT}> +handler, then you won't be able to CTRL-C your way back to the debugger, +because the debugger's own C<$SIG{INT}> handler doesn't understand that +it needs to raise an exception to longjmp(3) out of slow syscalls. diff --git a/contrib/perl5/pod/perldelta.pod b/contrib/perl5/pod/perldelta.pod index a0af1e1..4a1a142 100644 --- a/contrib/perl5/pod/perldelta.pod +++ b/contrib/perl5/pod/perldelta.pod @@ -1,983 +1,2916 @@ =head1 NAME -perldelta - what's new for perl5.005 +perldelta - what's new for perl v5.6.0 =head1 DESCRIPTION -This document describes differences between the 5.004 release and this one. +This document describes differences between the 5.005 release and this one. -=head1 About the new versioning system +=head1 Core Enhancements + +=head2 Interpreter cloning, threads, and concurrency + +Perl 5.005_63 introduces the beginnings of support for running multiple +interpreters concurrently in different threads. In conjunction with +the perl_clone() API call, which can be used to selectively duplicate +the state of any given interpreter, it is possible to compile a +piece of code once in an interpreter, clone that interpreter +one or more times, and run all the resulting interpreters in distinct +threads. + +On the Windows platform, this feature is used to emulate fork() at the +interpreter level. See L<perlfork> for details about that. + +This feature is still in evolution. It is eventually meant to be used +to selectively clone a subroutine and data reachable from that +subroutine in a separate interpreter and run the cloned subroutine +in a separate thread. Since there is no shared data between the +interpreters, little or no locking will be needed (unless parts of +the symbol table are explicitly shared). This is obviously intended +to be an easy-to-use replacement for the existing threads support. + +Support for cloning interpreters and interpreter concurrency can be +enabled using the -Dusethreads Configure option (see win32/Makefile for +how to enable it on Windows.) The resulting perl executable will be +functionally identical to one that was built with -Dmultiplicity, but +the perl_clone() API call will only be available in the former. + +-Dusethreads enables the cpp macro USE_ITHREADS by default, which in turn +enables Perl source code changes that provide a clear separation between +the op tree and the data it operates with. The former is immutable, and +can therefore be shared between an interpreter and all of its clones, +while the latter is considered local to each interpreter, and is therefore +copied for each clone. + +Note that building Perl with the -Dusemultiplicity Configure option +is adequate if you wish to run multiple B<independent> interpreters +concurrently in different threads. -Dusethreads only provides the +additional functionality of the perl_clone() API call and other +support for running B<cloned> interpreters concurrently. + + NOTE: This is an experimental feature. Implementation details are + subject to change. + +=head2 Lexically scoped warning categories + +You can now control the granularity of warnings emitted by perl at a finer +level using the C<use warnings> pragma. L<warnings> and L<perllexwarn> +have copious documentation on this feature. + +=head2 Unicode and UTF-8 support + +Perl now uses UTF-8 as its internal representation for character +strings. The C<utf8> and C<bytes> pragmas are used to control this support +in the current lexical scope. See L<perlunicode>, L<utf8> and L<bytes> for +more information. + +This feature is expected to evolve quickly to support some form of I/O +disciplines that can be used to specify the kind of input and output data +(bytes or characters). Until that happens, additional modules from CPAN +will be needed to complete the toolkit for dealing with Unicode. + + NOTE: This should be considered an experimental feature. Implementation + details are subject to change. + +=head2 Support for interpolating named characters + +The new C<\N> escape interpolates named characters within strings. +For example, C<"Hi! \N{WHITE SMILING FACE}"> evaluates to a string +with a unicode smiley face at the end. + +=head2 "our" declarations + +An "our" declaration introduces a value that can be best understood +as a lexically scoped symbolic alias to a global variable in the +package that was current where the variable was declared. This is +mostly useful as an alternative to the C<vars> pragma, but also provides +the opportunity to introduce typing and other attributes for such +variables. See L<perlfunc/our>. + +=head2 Support for strings represented as a vector of ordinals + +Literals of the form C<v1.2.3.4> are now parsed as a string composed +of characters with the specified ordinals. This is an alternative, more +readable way to construct (possibly unicode) strings instead of +interpolating characters, as in C<"\x{1}\x{2}\x{3}\x{4}">. The leading +C<v> may be omitted if there are more than two ordinals, so C<1.2.3> is +parsed the same as C<v1.2.3>. + +Strings written in this form are also useful to represent version "numbers". +It is easy to compare such version "numbers" (which are really just plain +strings) using any of the usual string comparison operators C<eq>, C<ne>, +C<lt>, C<gt>, etc., or perform bitwise string operations on them using C<|>, +C<&>, etc. + +In conjunction with the new C<$^V> magic variable (which contains +the perl version as a string), such literals can be used as a readable way +to check if you're running a particular version of Perl: + + # this will parse in older versions of Perl also + if ($^V and $^V gt v5.6.0) { + # new features supported + } -Perl is now developed on two tracks: a maintenance track that makes -small, safe updates to released production versions with emphasis on -compatibility; and a development track that pursues more aggressive -evolution. Maintenance releases (which should be considered production -quality) have subversion numbers that run from C<1> to C<49>, and -development releases (which should be considered "alpha" quality) run -from C<50> to C<99>. +C<require> and C<use> also have some special magic to support such literals. +They will be interpreted as a version rather than as a module name: -Perl 5.005 is the combined product of the new dual-track development -scheme. + require v5.6.0; # croak if $^V lt v5.6.0 + use v5.6.0; # same, but croaks at compile-time -=head1 Incompatible Changes +Alternatively, the C<v> may be omitted if there is more than one dot: + + require 5.6.0; + use 5.6.0; + +Also, C<sprintf> and C<printf> support the Perl-specific format flag C<%v> +to print ordinals of characters in arbitrary strings: + + printf "v%vd", $^V; # prints current version, such as "v5.5.650" + printf "%*vX", ":", $addr; # formats IPv6 address + printf "%*vb", " ", $bits; # displays bitstring + +See L<perldata/"Scalar value constructors"> for additional information. + +=head2 Improved Perl version numbering system + +Beginning with Perl version 5.6.0, the version number convention has been +changed to a "dotted integer" scheme that is more commonly found in open +source projects. + +Maintenance versions of v5.6.0 will be released as v5.6.1, v5.6.2 etc. +The next development series following v5.6.0 will be numbered v5.7.x, +beginning with v5.7.0, and the next major production release following +v5.6.0 will be v5.8.0. + +The English module now sets $PERL_VERSION to $^V (a string value) rather +than C<$]> (a numeric value). (This is a potential incompatibility. +Send us a report via perlbug if you are affected by this.) + +The v1.2.3 syntax is also now legal in Perl. +See L<Support for strings represented as a vector of ordinals> for more on that. -=head2 WARNING: This version is not binary compatible with Perl 5.004. +To cope with the new versioning system's use of at least three significant +digits for each version component, the method used for incrementing the +subversion number has also changed slightly. We assume that versions older +than v5.6.0 have been incrementing the subversion component in multiples of +10. Versions after v5.6.0 will increment them by 1. Thus, using the new +notation, 5.005_03 is the "same" as v5.5.30, and the first maintenance +version following v5.6.0 will be v5.6.1 (which should be read as being +equivalent to a floating point value of 5.006_001 in the older format, +stored in C<$]>). -Starting with Perl 5.004_50 there were many deep and far-reaching changes -to the language internals. If you have dynamically loaded extensions -that you built under perl 5.003 or 5.004, you can continue to use them -with 5.004, but you will need to rebuild and reinstall those extensions -to use them 5.005. See L<INSTALL> for detailed instructions on how to -upgrade. +=head2 New syntax for declaring subroutine attributes -=head2 Default installation structure has changed +Formerly, if you wanted to mark a subroutine as being a method call or +as requiring an automatic lock() when it is entered, you had to declare +that with a C<use attrs> pragma in the body of the subroutine. +That can now be accomplished with declaration syntax, like this: -The new Configure defaults are designed to allow a smooth upgrade from -5.004 to 5.005, but you should read L<INSTALL> for a detailed -discussion of the changes in order to adapt them to your system. + sub mymethod : locked method ; + ... + sub mymethod : locked method { + ... + } + + sub othermethod :locked :method ; + ... + sub othermethod :locked :method { + ... + } + + +(Note how only the first C<:> is mandatory, and whitespace surrounding +the C<:> is optional.) + +F<AutoSplit.pm> and F<SelfLoader.pm> have been updated to keep the attributes +with the stubs they provide. See L<attributes>. -=head2 Perl Source Compatibility +=head2 File and directory handles can be autovivified + +Similar to how constructs such as C<< $x->[0] >> autovivify a reference, +handle constructors (open(), opendir(), pipe(), socketpair(), sysopen(), +socket(), and accept()) now autovivify a file or directory handle +if the handle passed to them is an uninitialized scalar variable. This +allows the constructs such as C<open(my $fh, ...)> and C<open(local $fh,...)> +to be used to create filehandles that will conveniently be closed +automatically when the scope ends, provided there are no other references +to them. This largely eliminates the need for typeglobs when opening +filehandles that must be passed around, as in the following example: + + sub myopen { + open my $fh, "@_" + or die "Can't open '@_': $!"; + return $fh; + } + + { + my $f = myopen("</etc/motd"); + print <$f>; + # $f implicitly closed here + } -When none of the experimental features are enabled, there should be -very few user-visible Perl source compatibility issues. +=head2 open() with more than two arguments -If threads are enabled, then some caveats apply. C<@_> and C<$_> become -lexical variables. The effect of this should be largely transparent to -the user, but there are some boundary conditions under which user will -need to be aware of the issues. For example, C<local(@_)> results in -a "Can't localize lexical variable @_ ..." message. This may be enabled -in a future version. +If open() is passed three arguments instead of two, the second argument +is used as the mode and the third argument is taken to be the file name. +This is primarily useful for protecting against unintended magic behavior +of the traditional two-argument form. See L<perlfunc/open>. -Some new keywords have been introduced. These are generally expected to -have very little impact on compatibility. See L<New C<INIT> keyword>, -L<New C<lock> keyword>, and L<New C<qr//> operator>. +=head2 64-bit support -Certain barewords are now reserved. Use of these will provoke a warning -if you have asked for them with the C<-w> switch. -See L<C<our> is now a reserved word>. +Any platform that has 64-bit integers either -=head2 C Source Compatibility + (1) natively as longs or ints + (2) via special compiler flags + (3) using long long or int64_t -There have been a large number of changes in the internals to support -the new features in this release. +is able to use "quads" (64-bit integers) as follows: =over 4 -=item Core sources now require ANSI C compiler +=item * -An ANSI C compiler is now B<required> to build perl. See F<INSTALL>. +constants (decimal, hexadecimal, octal, binary) in the code -=item All Perl global variables must now be referenced with an explicit prefix +=item * -All Perl global variables that are visible for use by extensions now -have a C<PL_> prefix. New extensions should C<not> refer to perl globals -by their unqualified names. To preserve sanity, we provide limited -backward compatibility for globals that are being widely used like -C<sv_undef> and C<na> (which should now be written as C<PL_sv_undef>, -C<PL_na> etc.) +arguments to oct() and hex() -If you find that your XS extension does not compile anymore because a -perl global is not visible, try adding a C<PL_> prefix to the global -and rebuild. +=item * -It is strongly recommended that all functions in the Perl API that don't -begin with C<perl> be referenced with a C<Perl_> prefix. The bare function -names without the C<Perl_> prefix are supported with macros, but this -support may cease in a future release. +arguments to print(), printf() and sprintf() (flag prefixes ll, L, q) -See L<perlguts/"API LISTING">. +=item * -=item Enabling threads has source compatibility issues +printed as such -Perl built with threading enabled requires extensions to use the new -C<dTHR> macro to initialize the handle to access per-thread data. -If you see a compiler error that talks about the variable C<thr> not -being declared (when building a module that has XS code), you need -to add C<dTHR;> at the beginning of the block that elicited the error. +=item * -The API function C<perl_get_sv("@",FALSE)> should be used instead of -directly accessing perl globals as C<GvSV(errgv)>. The API call is -backward compatible with existing perls and provides source compatibility -with threading is enabled. +pack() and unpack() "q" and "Q" formats -See L<"C Source Compatibility"> for more information. +=item * + +in basic arithmetics: + - * / % (NOTE: operating close to the limits +of the integer values may produce surprising results) + +=item * + +in bit arithmetics: & | ^ ~ << >> (NOTE: these used to be forced +to be 32 bits wide but now operate on the full native width.) + +=item * + +vec() =back -=head2 Binary Compatibility +Note that unless you have the case (a) you will have to configure +and compile Perl using the -Duse64bitint Configure flag. -This version is NOT binary compatible with older versions. All extensions -will need to be recompiled. Further binaries built with threads enabled -are incompatible with binaries built without. This should largely be -transparent to the user, as all binary incompatible configurations have -their own unique architecture name, and extension binaries get installed at -unique locations. This allows coexistence of several configurations in -the same directory hierarchy. See F<INSTALL>. + NOTE: The Configure flags -Duselonglong and -Duse64bits have been + deprecated. Use -Duse64bitint instead. -=head2 Security fixes may affect compatibility +There are actually two modes of 64-bitness: the first one is achieved +using Configure -Duse64bitint and the second one using Configure +-Duse64bitall. The difference is that the first one is minimal and +the second one maximal. The first works in more places than the second. -A few taint leaks and taint omissions have been corrected. This may lead -to "failure" of scripts that used to work with older versions. Compiling -with -DINCOMPLETE_TAINTS provides a perl with minimal amounts of changes -to the tainting behavior. But note that the resulting perl will have -known insecurities. +The C<use64bitint> does only as much as is required to get 64-bit +integers into Perl (this may mean, for example, using "long longs") +while your memory may still be limited to 2 gigabytes (because your +pointers could still be 32-bit). Note that the name C<64bitint> does +not imply that your C compiler will be using 64-bit C<int>s (it might, +but it doesn't have to): the C<use64bitint> means that you will be +able to have 64 bits wide scalar values. -Oneliners with the C<-e> switch do not create temporary files anymore. +The C<use64bitall> goes all the way by attempting to switch also +integers (if it can), longs (and pointers) to being 64-bit. This may +create an even more binary incompatible Perl than -Duse64bitint: the +resulting executable may not run at all in a 32-bit box, or you may +have to reboot/reconfigure/rebuild your operating system to be 64-bit +aware. -=head2 Relaxed new mandatory warnings introduced in 5.004 +Natively 64-bit systems like Alpha and Cray need neither -Duse64bitint +nor -Duse64bitall. -Many new warnings that were introduced in 5.004 have been made -optional. Some of these warnings are still present, but perl's new -features make them less often a problem. See L<New Diagnostics>. +Last but not least: note that due to Perl's habit of always using +floating point numbers, the quads are still not true integers. +When quads overflow their limits (0...18_446_744_073_709_551_615 unsigned, +-9_223_372_036_854_775_808...9_223_372_036_854_775_807 signed), they +are silently promoted to floating point numbers, after which they will +start losing precision (in their lower digits). -=head2 Licensing + NOTE: 64-bit support is still experimental on most platforms. + Existing support only covers the LP64 data model. In particular, the + LLP64 data model is not yet supported. 64-bit libraries and system + APIs on many platforms have not stabilized--your mileage may vary. -Perl has a new Social Contract for contributors. See F<Porting/Contract>. +=head2 Large file support -The license included in much of the Perl documentation has changed. -Most of the Perl documentation was previously under the implicit GNU -General Public License or the Artistic License (at the user's choice). -Now much of the documentation unambigously states the terms under which -it may be distributed. Those terms are in general much less restrictive -than the GNU GPL. See L<perl> and the individual perl man pages listed -therein. +If you have filesystems that support "large files" (files larger than +2 gigabytes), you may now also be able to create and access them from +Perl. -=head1 Core Changes + NOTE: The default action is to enable large file support, if + available on the platform. +If the large file support is on, and you have a Fcntl constant +O_LARGEFILE, the O_LARGEFILE is automatically added to the flags +of sysopen(). -=head2 Threads +Beware that unless your filesystem also supports "sparse files" seeking +to umpteen petabytes may be inadvisable. -WARNING: Threading is considered an B<experimental> feature. Details of the -implementation may change without notice. There are known limitations -and some bugs. These are expected to be fixed in future versions. +Note that in addition to requiring a proper file system to do large +files you may also need to adjust your per-process (or your +per-system, or per-process-group, or per-user-group) maximum filesize +limits before running Perl scripts that try to handle large files, +especially if you intend to write such files. -See L<README.threads>. +Finally, in addition to your process/process group maximum filesize +limits, you may have quota limits on your filesystems that stop you +(your user id or your user group id) from using large files. -Mach cthreads (NEXTSTEP, OPENSTEP, Rhapsody) are now supported by -the Thread extension. +Adjusting your process/user/group/file system/operating system limits +is outside the scope of Perl core language. For process limits, you +may try increasing the limits using your shell's limits/limit/ulimit +command before running Perl. The BSD::Resource extension (not +included with the standard Perl distribution) may also be of use, it +offers the getrlimit/setrlimit interface that can be used to adjust +process resource usage limits, including the maximum filesize limit. -=head2 Compiler +=head2 Long doubles -WARNING: The Compiler and related tools are considered B<experimental>. -Features may change without notice, and there are known limitations -and bugs. Since the compiler is fully external to perl, the default -configuration will build and install it. +In some systems you may be able to use long doubles to enhance the +range and precision of your double precision floating point numbers +(that is, Perl's numbers). Use Configure -Duselongdouble to enable +this support (if it is available). -The Compiler produces three different types of transformations of a -perl program. The C backend generates C code that captures perl's state -just before execution begins. It eliminates the compile-time overheads -of the regular perl interpreter, but the run-time performance remains -comparatively the same. The CC backend generates optimized C code -equivalent to the code path at run-time. The CC backend has greater -potential for big optimizations, but only a few optimizations are -implemented currently. The Bytecode backend generates a platform -independent bytecode representation of the interpreter's state -just before execution. Thus, the Bytecode back end also eliminates -much of the compilation overhead of the interpreter. +=head2 "more bits" -The compiler comes with several valuable utilities. +You can "Configure -Dusemorebits" to turn on both the 64-bit support +and the long double support. -C<B::Lint> is an experimental module to detect and warn about suspicious -code, especially the cases that the C<-w> switch does not detect. +=head2 Enhanced support for sort() subroutines -C<B::Deparse> can be used to demystify perl code, and understand -how perl optimizes certain constructs. +Perl subroutines with a prototype of C<($$)>, and XSUBs in general, can +now be used as sort subroutines. In either case, the two elements to +be compared are passed as normal parameters in @_. See L<perlfunc/sort>. -C<B::Xref> generates cross reference reports of all definition and use -of variables, subroutines and formats in a program. +For unprototyped sort subroutines, the historical behavior of passing +the elements to be compared as the global variables $a and $b remains +unchanged. -C<B::Showlex> show the lexical variables used by a subroutine or file -at a glance. +=head2 C<sort $coderef @foo> allowed -C<perlcc> is a simple frontend for compiling perl. +sort() did not accept a subroutine reference as the comparison +function in earlier versions. This is now permitted. -See C<ext/B/README>, L<B>, and the respective compiler modules. +=head2 File globbing implemented internally -=head2 Regular Expressions +Perl now uses the File::Glob implementation of the glob() operator +automatically. This avoids using an external csh process and the +problems associated with it. -Perl's regular expression engine has been seriously overhauled, and -many new constructs are supported. Several bugs have been fixed. + NOTE: This is currently an experimental feature. Interfaces and + implementation are subject to change. -Here is an itemized summary: +=item Support for CHECK blocks -=over 4 +In addition to C<BEGIN>, C<INIT>, C<END>, C<DESTROY> and C<AUTOLOAD>, +subroutines named C<CHECK> are now special. These are queued up during +compilation and behave similar to END blocks, except they are called at +the end of compilation rather than at the end of execution. They cannot +be called directly. -=item Many new and improved optimizations +=head2 POSIX character class syntax [: :] supported -Changes in the RE engine: +For example to match alphabetic characters use /[[:alpha:]]/. +See L<perlre> for details. - Unneeded nodes removed; - Substrings merged together; - New types of nodes to process (SUBEXPR)* and similar expressions - quickly, used if the SUBEXPR has no side effects and matches - strings of the same length; - Better optimizations by lookup for constant substrings; - Better search for constants substrings anchored by $ ; +=item Better pseudo-random number generator -Changes in Perl code using RE engine: +In 5.005_0x and earlier, perl's rand() function used the C library +rand(3) function. As of 5.005_52, Configure tests for drand48(), +random(), and rand() (in that order) and picks the first one it finds. - More optimizations to s/longer/short/; - study() was not working; - /blah/ may be optimized to an analogue of index() if $& $` $' not seen; - Unneeded copying of matched-against string removed; - Only matched part of the string is copying if $` $' were not seen; +These changes should result in better random numbers from rand(). -=item Many bug fixes +=head2 Improved C<qw//> operator -Note that only the major bug fixes are listed here. See F<Changes> for others. +The C<qw//> operator is now evaluated at compile time into a true list +instead of being replaced with a run time call to C<split()>. This +removes the confusing misbehaviour of C<qw//> in scalar context, which +had inherited that behaviour from split(). - Backtracking might not restore start of $3. - No feedback if max count for * or + on "complex" subexpression - was reached, similarly (but at compile time) for {3,34567} - Primitive restrictions on max count introduced to decrease a - possibility of a segfault; - (ZERO-LENGTH)* could segfault; - (ZERO-LENGTH)* was prohibited; - Long REs were not allowed; - /RE/g could skip matches at the same position after a - zero-length match; +Thus: -=item New regular expression constructs + $foo = ($bar) = qw(a b c); print "$foo|$bar\n"; -The following new syntax elements are supported: +now correctly prints "3|a", instead of "2|a". - (?<=RE) - (?<!RE) - (?{ CODE }) - (?i-x) - (?i:RE) - (?(COND)YES_RE|NO_RE) - (?>RE) - \z +=item Better worst-case behavior of hashes -=item New operator for precompiled regular expressions +Small changes in the hashing algorithm have been implemented in +order to improve the distribution of lower order bits in the +hashed value. This is expected to yield better performance on +keys that are repeated sequences. -See L<New C<qr//> operator>. +=head2 pack() format 'Z' supported -=item Other improvements +The new format type 'Z' is useful for packing and unpacking null-terminated +strings. See L<perlfunc/"pack">. - Better debugging output (possibly with colors), - even from non-debugging Perl; - RE engine code now looks like C, not like assembler; - Behaviour of RE modifiable by `use re' directive; - Improved documentation; - Test suite significantly extended; - Syntax [:^upper:] etc., reserved inside character classes; +=head2 pack() format modifier '!' supported -=item Incompatible changes +The new format type modifier '!' is useful for packing and unpacking +native shorts, ints, and longs. See L<perlfunc/"pack">. - (?i) localized inside enclosing group; - $( is not interpolated into RE any more; - /RE/g may match at the same position (with non-zero length) - after a zero-length match (bug fix). +=head2 pack() and unpack() support counted strings -=back +The template character '/' can be used to specify a counted string +type to be packed or unpacked. See L<perlfunc/"pack">. -See L<perlre> and L<perlop>. +=head2 Comments in pack() templates -=head2 Improved malloc() +The '#' character in a template introduces a comment up to +end of the line. This facilitates documentation of pack() +templates. -See banner at the beginning of C<malloc.c> for details. +=head2 Weak references -=head2 Quicksort is internally implemented +In previous versions of Perl, you couldn't cache objects so as +to allow them to be deleted if the last reference from outside +the cache is deleted. The reference in the cache would hold a +reference count on the object and the objects would never be +destroyed. -Perl now contains its own highly optimized qsort() routine. The new qsort() -is resistant to inconsistent comparison functions, so Perl's C<sort()> will -not provoke coredumps any more when given poorly written sort subroutines. -(Some C library C<qsort()>s that were being used before used to have this -problem.) In our testing, the new C<qsort()> required the minimal number -of pair-wise compares on average, among all known C<qsort()> implementations. +Another familiar problem is with circular references. When an +object references itself, its reference count would never go +down to zero, and it would not get destroyed until the program +is about to exit. -See C<perlfunc/sort>. +Weak references solve this by allowing you to "weaken" any +reference, that is, make it not count towards the reference count. +When the last non-weak reference to an object is deleted, the object +is destroyed and all the weak references to the object are +automatically undef-ed. -=head2 Reliable signals +To use this feature, you need the WeakRef package from CPAN, which +contains additional documentation. -Perl's signal handling is susceptible to random crashes, because signals -arrive asynchronously, and the Perl runtime is not reentrant at arbitrary -times. + NOTE: This is an experimental feature. Details are subject to change. -However, one experimental implementation of reliable signals is available -when threads are enabled. See C<Thread::Signal>. Also see F<INSTALL> for -how to build a Perl capable of threads. +=head2 Binary numbers supported -=head2 Reliable stack pointers +Binary numbers are now supported as literals, in s?printf formats, and +C<oct()>: -The internals now reallocate the perl stack only at predictable times. -In particular, magic calls never trigger reallocations of the stack, -because all reentrancy of the runtime is handled using a "stack of stacks". -This should improve reliability of cached stack pointers in the internals -and in XSUBs. + $answer = 0b101010; + printf "The answer is: %b\n", oct("0b101010"); -=head2 More generous treatment of carriage returns +=head2 Lvalue subroutines -Perl used to complain if it encountered literal carriage returns in -scripts. Now they are mostly treated like whitespace within program text. -Inside string literals and here documents, literal carriage returns are -ignored if they occur paired with linefeeds, or get interpreted as whitespace -if they stand alone. This behavior means that literal carriage returns -in files should be avoided. You can get the older, more compatible (but -less generous) behavior by defining the preprocessor symbol -C<PERL_STRICT_CR> when building perl. Of course, all this has nothing -whatever to do with how escapes like C<\r> are handled within strings. +Subroutines can now return modifiable lvalues. +See L<perlsub/"Lvalue subroutines">. -Note that this doesn't somehow magically allow you to keep all text files -in DOS format. The generous treatment only applies to files that perl -itself parses. If your C compiler doesn't allow carriage returns in -files, you may still be unable to build modules that need a C compiler. + NOTE: This is an experimental feature. Details are subject to change. -=head2 Memory leaks +=head2 Some arrows may be omitted in calls through references -C<substr>, C<pos> and C<vec> don't leak memory anymore when used in lvalue -context. Many small leaks that impacted applications that embed multiple -interpreters have been fixed. +Perl now allows the arrow to be omitted in many constructs +involving subroutine calls through references. For example, +C<< $foo[10]->('foo') >> may now be written C<$foo[10]('foo')>. +This is rather similar to how the arrow may be omitted from +C<< $foo[10]->{'foo'} >>. Note however, that the arrow is still +required for C<< foo(10)->('bar') >>. -=head2 Better support for multiple interpreters +=head2 Boolean assignment operators are legal lvalues -The build-time option C<-DMULTIPLICITY> has had many of the details -reworked. Some previously global variables that should have been -per-interpreter now are. With care, this allows interpreters to call -each other. See the C<PerlInterp> extension on CPAN. +Constructs such as C<($a ||= 2) += 1> are now allowed. -=head2 Behavior of local() on array and hash elements is now well-defined +=head2 exists() is supported on subroutine names -See L<perlsub/"Temporary Values via local()">. +The exists() builtin now works on subroutine names. A subroutine +is considered to exist if it has been declared (even if implicitly). +See L<perlfunc/exists> for examples. -=head2 C<%!> is transparently tied to the L<Errno> module +=head2 exists() and delete() are supported on array elements -See L<perlvar>, and L<Errno>. +The exists() and delete() builtins now work on simple arrays as well. +The behavior is similar to that on hash elements. -=head2 Pseudo-hashes are supported +exists() can be used to check whether an array element has been +initialized. This avoids autovivifying array elements that don't exist. +If the array is tied, the EXISTS() method in the corresponding tied +package will be invoked. -See L<perlref>. +delete() may be used to remove an element from the array and return +it. The array element at that position returns to its unintialized +state, so that testing for the same element with exists() will return +false. If the element happens to be the one at the end, the size of +the array also shrinks up to the highest element that tests true for +exists(), or 0 if none such is found. If the array is tied, the DELETE() +method in the corresponding tied package will be invoked. -=head2 C<EXPR foreach EXPR> is supported +See L<perlfunc/exists> and L<perlfunc/delete> for examples. -See L<perlsyn>. +=head2 Pseudo-hashes work better -=head2 Keywords can be globally overridden +Dereferencing some types of reference values in a pseudo-hash, +such as C<< $ph->{foo}[1] >>, was accidentally disallowed. This has +been corrected. -See L<perlsub>. +When applied to a pseudo-hash element, exists() now reports whether +the specified value exists, not merely if the key is valid. -=head2 C<$^E> is meaningful on Win32 +delete() now works on pseudo-hashes. When given a pseudo-hash element +or slice it deletes the values corresponding to the keys (but not the keys +themselves). See L<perlref/"Pseudo-hashes: Using an array as a hash">. -See L<perlvar>. +Pseudo-hash slices with constant keys are now optimized to array lookups +at compile-time. -=head2 C<foreach (1..1000000)> optimized +List assignments to pseudo-hash slices are now supported. -C<foreach (1..1000000)> is now optimized into a counting loop. It does -not try to allocate a 1000000-size list anymore. +The C<fields> pragma now provides ways to create pseudo-hashes, via +fields::new() and fields::phash(). See L<fields>. -=head2 C<Foo::> can be used as implicitly quoted package name + NOTE: The pseudo-hash data type continues to be experimental. + Limiting oneself to the interface elements provided by the + fields pragma will provide protection from any future changes. -Barewords caused unintuitive behavior when a subroutine with the same -name as a package happened to be defined. Thus, C<new Foo @args>, -use the result of the call to C<Foo()> instead of C<Foo> being treated -as a literal. The recommended way to write barewords in the indirect -object slot is C<new Foo:: @args>. Note that the method C<new()> is -called with a first argument of C<Foo>, not C<Foo::> when you do that. +=head2 Automatic flushing of output buffers -=head2 C<exists $Foo::{Bar::}> tests existence of a package +fork(), exec(), system(), qx//, and pipe open()s now flush buffers +of all files opened for output when the operation was attempted. This +mostly eliminates confusing buffering mishaps suffered by users unaware +of how Perl internally handles I/O. -It was impossible to test for the existence of a package without -actually creating it before. Now C<exists $Foo::{Bar::}> can be -used to test if the C<Foo::Bar> namespace has been created. +This is not supported on some platforms like Solaris where a suitably +correct implementation of fflush(NULL) isn't available. -=head2 Better locale support +=head2 Better diagnostics on meaningless filehandle operations -See L<perllocale>. +Constructs such as C<< open(<FH>) >> and C<< close(<FH>) >> +are compile time errors. Attempting to read from filehandles that +were opened only for writing will now produce warnings (just as +writing to read-only filehandles does). -=head2 Experimental support for 64-bit platforms +=head2 Where possible, buffered data discarded from duped input filehandle -Perl5 has always had 64-bit support on systems with 64-bit longs. -Starting with 5.005, the beginnings of experimental support for systems -with 32-bit long and 64-bit 'long long' integers has been added. -If you add -DUSE_LONG_LONG to your ccflags in config.sh (or manually -define it in perl.h) then perl will be built with 'long long' support. -There will be many compiler warnings, and the resultant perl may not -work on all systems. There are many other issues related to -third-party extensions and libraries. This option exists to allow -people to work on those issues. +C<< open(NEW, "<&OLD") >> now attempts to discard any data that +was previously read and buffered in C<OLD> before duping the handle. +On platforms where doing this is allowed, the next read operation +on C<NEW> will return the same data as the corresponding operation +on C<OLD>. Formerly, it would have returned the data from the start +of the following disk block instead. -=head2 prototype() returns useful results on builtins +=head2 eof() has the same old magic as <> -See L<perlfunc/prototype>. +C<eof()> would return true if no attempt to read from C<< <> >> had +yet been made. C<eof()> has been changed to have a little magic of its +own, it now opens the C<< <> >> files. -=head2 Extended support for exception handling +=head2 binmode() can be used to set :crlf and :raw modes -C<die()> now accepts a reference value, and C<$@> gets set to that -value in exception traps. This makes it possible to propagate -exception objects. This is an undocumented B<experimental> feature. +binmode() now accepts a second argument that specifies a discipline +for the handle in question. The two pseudo-disciplines ":raw" and +":crlf" are currently supported on DOS-derivative platforms. +See L<perlfunc/"binmode"> and L<open>. -=head2 Re-blessing in DESTROY() supported for chaining DESTROY() methods +=head2 C<-T> filetest recognizes UTF-8 encoded files as "text" -See L<perlobj/Destructors>. +The algorithm used for the C<-T> filetest has been enhanced to +correctly identify UTF-8 content as "text". -=head2 All C<printf> format conversions are handled internally +=head2 system(), backticks and pipe open now reflect exec() failure -See L<perlfunc/printf>. +On Unix and similar platforms, system(), qx() and open(FOO, "cmd |") +etc., are implemented via fork() and exec(). When the underlying +exec() fails, earlier versions did not report the error properly, +since the exec() happened to be in a different process. -=head2 New C<INIT> keyword +The child process now communicates with the parent about the +error in launching the external command, which allows these +constructs to return with their usual error value and set $!. -C<INIT> subs are like C<BEGIN> and C<END>, but they get run just before -the perl runtime begins execution. e.g., the Perl Compiler makes use of -C<INIT> blocks to initialize and resolve pointers to XSUBs. +=head2 Improved diagnostics -=head2 New C<lock> keyword +Line numbers are no longer suppressed (under most likely circumstances) +during the global destruction phase. -The C<lock> keyword is the fundamental synchronization primitive -in threaded perl. When threads are not enabled, it is currently a noop. +Diagnostics emitted from code running in threads other than the main +thread are now accompanied by the thread ID. -To minimize impact on source compatibility this keyword is "weak", i.e., any -user-defined subroutine of the same name overrides it, unless a C<use Thread> -has been seen. +Embedded null characters in diagnostics now actually show up. They +used to truncate the message in prior versions. -=head2 New C<qr//> operator +$foo::a and $foo::b are now exempt from "possible typo" warnings only +if sort() is encountered in package C<foo>. -The C<qr//> operator, which is syntactically similar to the other quote-like -operators, is used to create precompiled regular expressions. This compiled -form can now be explicitly passed around in variables, and interpolated in -other regular expressions. See L<perlop>. +Unrecognized alphabetic escapes encountered when parsing quote +constructs now generate a warning, since they may take on new +semantics in later versions of Perl. -=head2 C<our> is now a reserved word +Many diagnostics now report the internal operation in which the warning +was provoked, like so: -Calling a subroutine with the name C<our> will now provoke a warning when -using the C<-w> switch. + Use of uninitialized value in concatenation (.) at (eval 1) line 1. + Use of uninitialized value in print at (eval 1) line 1. -=head2 Tied arrays are now fully supported +Diagnostics that occur within eval may also report the file and line +number where the eval is located, in addition to the eval sequence +number and the line number within the evaluated text itself. For +example: -See L<Tie::Array>. + Not enough arguments for scalar at (eval 4)[newlib/perl5db.pl:1411] line 2, at EOF -=head2 Tied handles support is better +=head2 Diagnostics follow STDERR -Several missing hooks have been added. There is also a new base class for -TIEARRAY implementations. See L<Tie::Array>. +Diagnostic output now goes to whichever file the C<STDERR> handle +is pointing at, instead of always going to the underlying C runtime +library's C<stderr>. -=head2 4th argument to substr +=item More consistent close-on-exec behavior -substr() can now both return and replace in one operation. The optional -4th argument is the replacement string. See L<perlfunc/substr>. +On systems that support a close-on-exec flag on filehandles, the +flag is now set for any handles created by pipe(), socketpair(), +socket(), and accept(), if that is warranted by the value of $^F +that may be in effect. Earlier versions neglected to set the flag +for handles created with these operators. See L<perlfunc/pipe>, +L<perlfunc/socketpair>, L<perlfunc/socket>, L<perlfunc/accept>, +and L<perlvar/$^F>. -=head2 Negative LENGTH argument to splice +=head2 syswrite() ease-of-use -splice() with a negative LENGTH argument now work similar to what the -LENGTH did for substr(). Previously a negative LENGTH was treated as -0. See L<perlfunc/splice>. +The length argument of C<syswrite()> has become optional. -=head2 Magic lvalues are now more magical +=head2 Better syntax checks on parenthesized unary operators -When you say something like C<substr($x, 5) = "hi">, the scalar returned -by substr() is special, in that any modifications to it affect $x. -(This is called a 'magic lvalue' because an 'lvalue' is something on -the left side of an assignment.) Normally, this is exactly what you -would expect to happen, but Perl uses the same magic if you use substr(), -pos(), or vec() in a context where they might be modified, like taking -a reference with C<\> or as an argument to a sub that modifies C<@_>. -In previous versions, this 'magic' only went one way, but now changes -to the scalar the magic refers to ($x in the above example) affect the -magic lvalue too. For instance, this code now acts differently: +Expressions such as: - $x = "hello"; - sub printit { - $x = "g'bye"; - print $_[0], "\n"; - } - printit(substr($x, 0, 5)); + print defined(&foo,&bar,&baz); + print uc("foo","bar","baz"); + undef($foo,&bar); -In previous versions, this would print "hello", but it now prints "g'bye". +used to be accidentally allowed in earlier versions, and produced +unpredictable behaviour. Some produced ancillary warnings +when used in this way; others silently did the wrong thing. -=head2 E<lt>E<gt> now reads in records +The parenthesized forms of most unary operators that expect a single +argument now ensure that they are not called with more than one +argument, making the cases shown above syntax errors. The usual +behaviour of: -If C<$/> is a referenence to an integer, or a scalar that holds an integer, -E<lt>E<gt> will read in records instead of lines. For more info, see -L<perlvar/$/>. + print defined &foo, &bar, &baz; + print uc "foo", "bar", "baz"; + undef $foo, &bar; -=head2 pack() format 'Z' supported +remains unchanged. See L<perlop>. -The new format type 'Z' is useful for packing and unpacking null-terminated -strings. See L<perlfunc/"pack">. +=head2 Bit operators support full native integer width -=head1 Significant bug fixes +The bit operators (& | ^ ~ << >>) now operate on the full native +integral width (the exact size of which is available in $Config{ivsize}). +For example, if your platform is either natively 64-bit or if Perl +has been configured to use 64-bit integers, these operations apply +to 8 bytes (as opposed to 4 bytes on 32-bit platforms). +For portability, be sure to mask off the excess bits in the result of +unary C<~>, e.g., C<~$x & 0xffffffff>. -=head2 E<lt>HANDLEE<gt> on empty files +=head2 Improved security features -With C<$/> set to C<undef>, slurping an empty file returns a string of -zero length (instead of C<undef>, as it used to) for the first time the -HANDLE is read. Subsequent reads yield C<undef>. +More potentially unsafe operations taint their results for improved +security. -This means that the following will append "foo" to an empty file (it used -to not do anything before): +The C<passwd> and C<shell> fields returned by the getpwent(), getpwnam(), +and getpwuid() are now tainted, because the user can affect their own +encrypted password and login shell. - perl -0777 -pi -e 's/^/foo/' empty_file +The variable modified by shmread(), and messages returned by msgrcv() +(and its object-oriented interface IPC::SysV::Msg::rcv) are also tainted, +because other untrusted processes can modify messages and shared memory +segments for their own nefarious purposes. -Note that the behavior of: +=item More functional bareword prototype (*) - perl -pi -e 's/^/foo/' empty_file +Bareword prototypes have been rationalized to enable them to be used +to override builtins that accept barewords and interpret them in +a special way, such as C<require> or C<do>. -is unchanged (it continues to leave the file empty). +Arguments prototyped as C<*> will now be visible within the subroutine +as either a simple scalar or as a reference to a typeglob. +See L<perlsub/Prototypes>. -=head1 Supported Platforms +=head2 C<require> and C<do> may be overridden -Configure has many incremental improvements. Site-wide policy for building -perl can now be made persistent, via Policy.sh. Configure also records -the command-line arguments used in F<config.sh>. +C<require> and C<do 'file'> operations may be overridden locally +by importing subroutines of the same name into the current package +(or globally by importing them into the CORE::GLOBAL:: namespace). +Overriding C<require> will also affect C<use>, provided the override +is visible at compile-time. +See L<perlsub/"Overriding Built-in Functions">. -=head2 New Platforms +=head2 $^X variables may now have names longer than one character -BeOS is now supported. See L<README.beos>. +Formerly, $^X was synonymous with ${"\cX"}, but $^XY was a syntax +error. Now variable names that begin with a control character may be +arbitrarily long. However, for compatibility reasons, these variables +I<must> be written with explicit braces, as C<${^XY}> for example. +C<${^XYZ}> is synonymous with ${"\cXYZ"}. Variable names with more +than one control character, such as C<${^XY^Z}>, are illegal. -DOS is now supported under the DJGPP tools. See L<README.dos>. +The old syntax has not changed. As before, `^X' may be either a +literal control-X character or the two-character sequence `caret' plus +`X'. When braces are omitted, the variable name stops after the +control character. Thus C<"$^XYZ"> continues to be synonymous with +C<$^X . "YZ"> as before. -GNU/Hurd is now supported. +As before, lexical variables may not have names beginning with control +characters. As before, variables whose names begin with a control +character are always forced to be in package `main'. All such variables +are reserved for future extensions, except those that begin with +C<^_>, which may be used by user programs and are guaranteed not to +acquire special meaning in any future version of Perl. -MiNT is now supported. See L<README.mint>. +=head2 New variable $^C reflects C<-c> switch -MPE/iX is now supported. See L<README.mpeix>. +C<$^C> has a boolean value that reflects whether perl is being run +in compile-only mode (i.e. via the C<-c> switch). Since +BEGIN blocks are executed under such conditions, this variable +enables perl code to determine whether actions that make sense +only during normal running are warranted. See L<perlvar>. -MVS (aka OS390, aka Open Edition) is now supported. See L<README.os390>. +=head2 New variable $^V contains Perl version as a string -Stratus VOS is now supported. See L<README.vos>. +C<$^V> contains the Perl version number as a string composed of +characters whose ordinals match the version numbers, i.e. v5.6.0. +This may be used in string comparisons. -=head2 Changes in existing support +See C<Support for strings represented as a vector of ordinals> for an +example. -Win32 support has been vastly enhanced. Support for Perl Object, a C++ -encapsulation of Perl. GCC and EGCS are now supported on Win32. -See F<README.win32>, aka L<perlwin32>. +=head2 Optional Y2K warnings -VMS configuration system has been rewritten. See L<README.vms>. +If Perl is built with the cpp macro C<PERL_Y2KWARN> defined, +it emits optional warnings when concatenating the number 19 +with another number. -The hints files for most Unix platforms have seen incremental improvements. +This behavior must be specifically enabled when running Configure. +See F<INSTALL> and F<README.Y2K>. =head1 Modules and Pragmata -=head2 New Modules +=head2 Modules -=over +=over 4 + +=item attributes + +While used internally by Perl as a pragma, this module also +provides a way to fetch subroutine and variable attributes. +See L<attributes>. =item B -Perl compiler and tools. See L<B>. +The Perl Compiler suite has been extensively reworked for this +release. More of the standard Perl testsuite passes when run +under the Compiler, but there is still a significant way to +go to achieve production quality compiled executables. + + NOTE: The Compiler suite remains highly experimental. The + generated code may not be correct, even it manages to execute + without errors. + +=item Benchmark + +Overall, Benchmark results exhibit lower average error and better timing +accuracy. + +You can now run tests for I<n> seconds instead of guessing the right +number of tests to run: e.g., timethese(-5, ...) will run each +code for at least 5 CPU seconds. Zero as the "number of repetitions" +means "for at least 3 CPU seconds". The output format has also +changed. For example: + + use Benchmark;$x=3;timethese(-5,{a=>sub{$x*$x},b=>sub{$x**2}}) + +will now output something like this: + + Benchmark: running a, b, each for at least 5 CPU seconds... + a: 5 wallclock secs ( 5.77 usr + 0.00 sys = 5.77 CPU) @ 200551.91/s (n=1156516) + b: 4 wallclock secs ( 5.00 usr + 0.02 sys = 5.02 CPU) @ 159605.18/s (n=800686) + +New features: "each for at least N CPU seconds...", "wallclock secs", +and the "@ operations/CPU second (n=operations)". + +timethese() now returns a reference to a hash of Benchmark objects containing +the test results, keyed on the names of the tests. + +timethis() now returns the iterations field in the Benchmark result object +instead of 0. + +timethese(), timethis(), and the new cmpthese() (see below) can also take +a format specifier of 'none' to suppress output. + +A new function countit() is just like timeit() except that it takes a +TIME instead of a COUNT. + +A new function cmpthese() prints a chart comparing the results of each test +returned from a timethese() call. For each possible pair of tests, the +percentage speed difference (iters/sec or seconds/iter) is shown. + +For other details, see L<Benchmark>. + +=item ByteLoader + +The ByteLoader is a dedicated extension to generate and run +Perl bytecode. See L<ByteLoader>. + +=item constant + +References can now be used. + +The new version also allows a leading underscore in constant names, but +disallows a double leading underscore (as in "__LINE__"). Some other names +are disallowed or warned against, including BEGIN, END, etc. Some names +which were forced into main:: used to fail silently in some cases; now they're +fatal (outside of main::) and an optional warning (inside of main::). +The ability to detect whether a constant had been set with a given name has +been added. + +See L<constant>. + +=item charnames + +This pragma implements the C<\N> string escape. See L<charnames>. =item Data::Dumper -A module to pretty print Perl data. See L<Data::Dumper>. +A C<Maxdepth> setting can be specified to avoid venturing +too deeply into deep data structures. See L<Data::Dumper>. + +The XSUB implementation of Dump() is now automatically called if the +C<Useqq> setting is not in use. + +Dumping C<qr//> objects works correctly. + +=item DB + +C<DB> is an experimental module that exposes a clean abstraction +to Perl's debugging API. + +=item DB_File + +DB_File can now be built with Berkeley DB versions 1, 2 or 3. +See C<ext/DB_File/Changes>. + +=item Devel::DProf + +Devel::DProf, a Perl source code profiler has been added. See +L<Devel::DProf> and L<dprofpp>. + +=item Devel::Peek + +The Devel::Peek module provides access to the internal representation +of Perl variables and data. It is a data debugging tool for the XS programmer. =item Dumpvalue -A module to dump perl values to the screen. See L<Dumpvalue>. +The Dumpvalue module provides screen dumps of Perl data. + +=item DynaLoader -=item Errno +DynaLoader now supports a dl_unload_file() function on platforms that +support unloading shared objects using dlclose(). -A module to look up errors more conveniently. See L<Errno>. +Perl can also optionally arrange to unload all extension shared objects +loaded by Perl. To enable this, build Perl with the Configure option +C<-Accflags=-DDL_UNLOAD_ALL_AT_EXIT>. (This maybe useful if you are +using Apache with mod_perl.) + +=item English + +$PERL_VERSION now stands for C<$^V> (a string value) rather than for C<$]> +(a numeric value). + +=item Env + +Env now supports accessing environment variables like PATH as array +variables. + +=item Fcntl + +More Fcntl constants added: F_SETLK64, F_SETLKW64, O_LARGEFILE for +large file (more than 4GB) access (NOTE: the O_LARGEFILE is +automatically added to sysopen() flags if large file support has been +configured, as is the default), Free/Net/OpenBSD locking behaviour +flags F_FLOCK, F_POSIX, Linux F_SHLCK, and O_ACCMODE: the combined +mask of O_RDONLY, O_WRONLY, and O_RDWR. The seek()/sysseek() +constants SEEK_SET, SEEK_CUR, and SEEK_END are available via the +C<:seek> tag. The chmod()/stat() S_IF* constants and S_IS* functions +are available via the C<:mode> tag. + +=item File::Compare + +A compare_text() function has been added, which allows custom +comparison functions. See L<File::Compare>. + +=item File::Find + +File::Find now works correctly when the wanted() function is either +autoloaded or is a symbolic reference. + +A bug that caused File::Find to lose track of the working directory +when pruning top-level directories has been fixed. + +File::Find now also supports several other options to control its +behavior. It can follow symbolic links if the C<follow> option is +specified. Enabling the C<no_chdir> option will make File::Find skip +changing the current directory when walking directories. The C<untaint> +flag can be useful when running with taint checks enabled. + +See L<File::Find>. + +=item File::Glob + +This extension implements BSD-style file globbing. By default, +it will also be used for the internal implementation of the glob() +operator. See L<File::Glob>. =item File::Spec -A portable API for file operations. +New methods have been added to the File::Spec module: devnull() returns +the name of the null device (/dev/null on Unix) and tmpdir() the name of +the temp directory (normally /tmp on Unix). There are now also methods +to convert between absolute and relative filenames: abs2rel() and +rel2abs(). For compatibility with operating systems that specify volume +names in file paths, the splitpath(), splitdir(), and catdir() methods +have been added. -=item ExtUtils::Installed +=item File::Spec::Functions -Query and manage installed modules. +The new File::Spec::Functions modules provides a function interface +to the File::Spec module. Allows shorthand -=item ExtUtils::Packlist + $fullname = catfile($dir1, $dir2, $file); -Manipulate .packlist files. +instead of -=item Fatal + $fullname = File::Spec->catfile($dir1, $dir2, $file); -Make functions/builtins succeed or die. +=item Getopt::Long -=item IPC::SysV +Getopt::Long licensing has changed to allow the Perl Artistic License +as well as the GPL. It used to be GPL only, which got in the way of +non-GPL applications that wanted to use Getopt::Long. -Constants and other support infrastructure for System V IPC operations -in perl. +Getopt::Long encourages the use of Pod::Usage to produce help +messages. For example: -=item Test + use Getopt::Long; + use Pod::Usage; + my $man = 0; + my $help = 0; + GetOptions('help|?' => \$help, man => \$man) or pod2usage(2); + pod2usage(1) if $help; + pod2usage(-exitstatus => 0, -verbose => 2) if $man; -A framework for writing testsuites. + __END__ -=item Tie::Array + =head1 NAME -Base class for tied arrays. + sample - Using GetOpt::Long and Pod::Usage -=item Tie::Handle + =head1 SYNOPSIS -Base class for tied handles. + sample [options] [file ...] -=item Thread + Options: + -help brief help message + -man full documentation -Perl thread creation, manipulation, and support. + =head1 OPTIONS -=item attrs + =over 8 -Set subroutine attributes. + =item B<-help> -=item fields + Print a brief help message and exits. -Compile-time class fields. + =item B<-man> -=item re + Prints the manual page and exits. -Various pragmata to control behavior of regular expressions. + =back -=back + =head1 DESCRIPTION -=head2 Changes in existing modules + B<This program> will read the given input file(s) and do someting + useful with the contents thereof. -=over + =cut -=item Benchmark +See L<Pod::Usage> for details. -You can now run tests for I<n> seconds instead of guessing the right -number of tests to run: e.g. timethese(-5, ...) will run each of the -codes for at least 5 CPU seconds. Zero as the "number of repetitions" -means "for at least 3 CPU seconds". The output format has also -changed. For example: +A bug that prevented the non-option call-back <> from being +specified as the first argument has been fixed. -use Benchmark;$x=3;timethese(-5,{a=>sub{$x*$x},b=>sub{$x**2}}) +To specify the characters < and > as option starters, use ><. Note, +however, that changing option starters is strongly deprecated. -will now output something like this: +=item IO -Benchmark: running a, b, each for at least 5 CPU seconds... - a: 5 wallclock secs ( 5.77 usr + 0.00 sys = 5.77 CPU) @ 200551.91/s (n=1156516) - b: 4 wallclock secs ( 5.00 usr + 0.02 sys = 5.02 CPU) @ 159605.18/s (n=800686) +write() and syswrite() will now accept a single-argument +form of the call, for consistency with Perl's syswrite(). -New features: "each for at least N CPU seconds...", "wallclock secs", -and the "@ operations/CPU second (n=operations)". +You can now create a TCP-based IO::Socket::INET without forcing +a connect attempt. This allows you to configure its options +(like making it non-blocking) and then call connect() manually. -=item Carp +A bug that prevented the IO::Socket::protocol() accessor +from ever returning the correct value has been corrected. -Carp has a new function cluck(). cluck() warns, like carp(), but also adds -a stack backtrace to the error message, like confess(). +IO::Socket::connect now uses non-blocking IO instead of alarm() +to do connect timeouts. -=item CGI +IO::Socket::accept now uses select() instead of alarm() for doing +timeouts. -CGI has been updated to version 2.42. +IO::Socket::INET->new now sets $! correctly on failure. $@ is +still set for backwards compatability. -=item Fcntl +=item JPL -More Fcntl constants added: F_SETLK64, F_SETLKW64, O_LARGEFILE for -large (more than 4G) file access (the 64-bit support is not yet -working, though, so no need to get overly excited), Free/Net/OpenBSD -locking behaviour flags F_FLOCK, F_POSIX, Linux F_SHLCK, and -O_ACCMODE: the mask of O_RDONLY, O_WRONLY, and O_RDWR. +Java Perl Lingo is now distributed with Perl. See jpl/README +for more information. + +=item lib + +C<use lib> now weeds out any trailing duplicate entries. +C<no lib> removes all named entries. + +=item Math::BigInt + +The bitwise operations C<<< << >>>, C<<< >> >>>, C<&>, C<|>, +and C<~> are now supported on bigints. =item Math::Complex -The accessor methods Re, Im, arg, abs, rho, and theta, can now also +The accessor methods Re, Im, arg, abs, rho, and theta can now also act as mutators (accessor $z->Re(), mutator $z->Re(3)). +The class method C<display_format> and the corresponding object method +C<display_format>, in addition to accepting just one argument, now can +also accept a parameter hash. Recognized keys of a parameter hash are +C<"style">, which corresponds to the old one parameter case, and two +new parameters: C<"format">, which is a printf()-style format string +(defaults usually to C<"%.15g">, you can revert to the default by +setting the format string to C<undef>) used for both parts of a +complex number, and C<"polar_pretty_print"> (defaults to true), +which controls whether an attempt is made to try to recognize small +multiples and rationals of pi (2pi, pi/2) at the argument (angle) of a +polar complex number. + +The potentially disruptive change is that in list context both methods +now I<return the parameter hash>, instead of only the value of the +C<"style"> parameter. + =item Math::Trig -A little bit of radial trigonometry (cylindrical and spherical) added: -radial coordinate conversions and the great circle distance. +A little bit of radial trigonometry (cylindrical and spherical), +radial coordinate conversions, and the great circle distance were added. -=item POSIX +=item Pod::Parser, Pod::InputObjects -POSIX now has its own platform-specific hints files. +Pod::Parser is a base class for parsing and selecting sections of +pod documentation from an input stream. This module takes care of +identifying pod paragraphs and commands in the input and hands off the +parsed paragraphs and commands to user-defined methods which are free +to interpret or translate them as they see fit. -=item DB_File +Pod::InputObjects defines some input objects needed by Pod::Parser, and +for advanced users of Pod::Parser that need more about a command besides +its name and text. -DB_File supports version 2.x of Berkeley DB. See C<ext/DB_File/Changes>. +As of release 5.6.0 of Perl, Pod::Parser is now the officially sanctioned +"base parser code" recommended for use by all pod2xxx translators. +Pod::Text (pod2text) and Pod::Man (pod2man) have already been converted +to use Pod::Parser and efforts to convert Pod::HTML (pod2html) are already +underway. For any questions or comments about pod parsing and translating +issues and utilities, please use the pod-people@perl.org mailing list. -=item MakeMaker +For further information, please see L<Pod::Parser> and L<Pod::InputObjects>. -MakeMaker now supports writing empty makefiles, provides a way to -specify that site umask() policy should be honored. There is also -better support for manipulation of .packlist files, and getting -information about installed modules. +=item Pod::Checker, podchecker -Extensions that have both architecture-dependent and -architecture-independent files are now always installed completely in -the architecture-dependent locations. Previously, the shareable parts -were shared both across architectures and across perl versions and were -therefore liable to be overwritten with newer versions that might have -subtle incompatibilities. +This utility checks pod files for correct syntax, according to +L<perlpod>. Obvious errors are flagged as such, while warnings are +printed for mistakes that can be handled gracefully. The checklist is +not complete yet. See L<Pod::Checker>. -=item CPAN +=item Pod::ParseUtils, Pod::Find -See <perlmodinstall> and L<CPAN>. +These modules provide a set of gizmos that are useful mainly for pod +translators. L<Pod::Find|Pod::Find> traverses directory structures and +returns found pod files, along with their canonical names (like +C<File::Spec::Unix>). L<Pod::ParseUtils|Pod::ParseUtils> contains +B<Pod::List> (useful for storing pod list information), B<Pod::Hyperlink> +(for parsing the contents of C<LE<lt>E<gt>> sequences) and B<Pod::Cache> +(for caching information about pod files, e.g., link nodes). -=item Cwd +=item Pod::Select, podselect -Cwd::cwd is faster on most platforms. +Pod::Select is a subclass of Pod::Parser which provides a function +named "podselect()" to filter out user-specified sections of raw pod +documentation from an input stream. podselect is a script that provides +access to Pod::Select from other scripts to be used as a filter. +See L<Pod::Select>. -=item Benchmark +=item Pod::Usage, pod2usage + +Pod::Usage provides the function "pod2usage()" to print usage messages for +a Perl script based on its embedded pod documentation. The pod2usage() +function is generally useful to all script authors since it lets them +write and maintain a single source (the pods) for documentation, thus +removing the need to create and maintain redundant usage message text +consisting of information already in the pods. + +There is also a pod2usage script which can be used from other kinds of +scripts to print usage messages from pods (even for non-Perl scripts +with pods embedded in comments). + +For details and examples, please see L<Pod::Usage>. + +=item Pod::Text and Pod::Man + +Pod::Text has been rewritten to use Pod::Parser. While pod2text() is +still available for backwards compatibility, the module now has a new +preferred interface. See L<Pod::Text> for the details. The new Pod::Text +module is easily subclassed for tweaks to the output, and two such +subclasses (Pod::Text::Termcap for man-page-style bold and underlining +using termcap information, and Pod::Text::Color for markup with ANSI color +sequences) are now standard. + +pod2man has been turned into a module, Pod::Man, which also uses +Pod::Parser. In the process, several outstanding bugs related to quotes +in section headers, quoting of code escapes, and nested lists have been +fixed. pod2man is now a wrapper script around this module. + +=item SDBM_File + +An EXISTS method has been added to this module (and sdbm_exists() has +been added to the underlying sdbm library), so one can now call exists +on an SDBM_File tied hash and get the correct result, rather than a +runtime error. + +A bug that may have caused data loss when more than one disk block +happens to be read from the database in a single FETCH() has been +fixed. + +=item Sys::Syslog -Keeps better time. +Sys::Syslog now uses XSUBs to access facilities from syslog.h so it +no longer requires syslog.ph to exist. + +=item Sys::Hostname + +Sys::Hostname now uses XSUBs to call the C library's gethostname() or +uname() if they exist. + +=item Term::ANSIColor + +Term::ANSIColor is a very simple module to provide easy and readable +access to the ANSI color and highlighting escape sequences, supported by +most ANSI terminal emulators. It is now included standard. + +=item Time::Local + +The timelocal() and timegm() functions used to silently return bogus +results when the date fell outside the machine's integer range. They +now consistently croak() if the date falls in an unsupported range. + +=item Win32 + +The error return value in list context has been changed for all functions +that return a list of values. Previously these functions returned a list +with a single element C<undef> if an error occurred. Now these functions +return the empty list in these situations. This applies to the following +functions: + + Win32::FsType + Win32::GetOSVersion + +The remaining functions are unchanged and continue to return C<undef> on +error even in list context. + +The Win32::SetLastError(ERROR) function has been added as a complement +to the Win32::GetLastError() function. + +The new Win32::GetFullPathName(FILENAME) returns the full absolute +pathname for FILENAME in scalar context. In list context it returns +a two-element list containing the fully qualified directory name and +the filename. See L<Win32>. + +=item XSLoader + +The XSLoader extension is a simpler alternative to DynaLoader. +See L<XSLoader>. + +=item DBM Filters + +A new feature called "DBM Filters" has been added to all the +DBM modules--DB_File, GDBM_File, NDBM_File, ODBM_File, and SDBM_File. +DBM Filters add four new methods to each DBM module: + + filter_store_key + filter_store_value + filter_fetch_key + filter_fetch_value + +These can be used to filter key-value pairs before the pairs are +written to the database or just after they are read from the database. +See L<perldbmfilter> for further information. =back +=head2 Pragmata + +C<use attrs> is now obsolete, and is only provided for +backward-compatibility. It's been replaced by the C<sub : attributes> +syntax. See L<perlsub/"Subroutine Attributes"> and L<attributes>. + +Lexical warnings pragma, C<use warnings;>, to control optional warnings. +See L<perllexwarn>. + +C<use filetest> to control the behaviour of filetests (C<-r> C<-w> +...). Currently only one subpragma implemented, "use filetest +'access';", that uses access(2) or equivalent to check permissions +instead of using stat(2) as usual. This matters in filesystems +where there are ACLs (access control lists): the stat(2) might lie, +but access(2) knows better. + +The C<open> pragma can be used to specify default disciplines for +handle constructors (e.g. open()) and for qx//. The two +pseudo-disciplines C<:raw> and C<:crlf> are currently supported on +DOS-derivative platforms (i.e. where binmode is not a no-op). +See also L</"binmode() can be used to set :crlf and :raw modes">. + =head1 Utility Changes -C<h2ph> and related utilities have been vastly overhauled. +=head2 dprofpp -C<perlcc>, a new experimental front end for the compiler is available. +C<dprofpp> is used to display profile data generated using C<Devel::DProf>. +See L<dprofpp>. -The crude GNU C<configure> emulator is now called C<configure.gnu> to -avoid trampling on C<Configure> under case-insensitive filesystems. +=head2 find2perl -C<perldoc> used to be rather slow. The slower features are now optional. -In particular, case-insensitive searches need the C<-i> switch, and -recursive searches need C<-r>. You can set these switches in the -C<PERLDOC> environment variable to get the old behavior. +The C<find2perl> utility now uses the enhanced features of the File::Find +module. The -depth and -follow options are supported. Pod documentation +is also included in the script. -=head1 Documentation Changes +=head2 h2xs -Config.pm now has a glossary of variables. +The C<h2xs> tool can now work in conjunction with C<C::Scan> (available +from CPAN) to automatically parse real-life header files. The C<-M>, +C<-a>, C<-k>, and C<-o> options are new. -F<Porting/patching.pod> has detailed instructions on how to create and -submit patches for perl. +=head2 perlcc -L<perlport> specifies guidelines on how to write portably. +C<perlcc> now supports the C and Bytecode backends. By default, +it generates output from the simple C backend rather than the +optimized C backend. -L<perlmodinstall> describes how to fetch and install modules from C<CPAN> -sites. +Support for non-Unix platforms has been improved. -Some more Perl traps are documented now. See L<perltrap>. +=head2 perldoc -L<perlopentut> gives a tutorial on using open(). +C<perldoc> has been reworked to avoid possible security holes. +It will not by default let itself be run as the superuser, but you +may still use the B<-U> switch to try to make it drop privileges +first. -L<perlreftut> gives a tutorial on references. +=head2 The Perl Debugger -L<perlthrtut> gives a tutorial on threads. +Many bug fixes and enhancements were added to F<perl5db.pl>, the +Perl debugger. The help documentation was rearranged. New commands +include C<< < ? >>, C<< > ? >>, and C<< { ? >> to list out current +actions, C<man I<docpage>> to run your doc viewer on some perl +docset, and support for quoted options. The help information was +rearranged, and should be viewable once again if you're using B<less> +as your pager. A serious security hole was plugged--you should +immediately remove all older versions of the Perl debugger as +installed in previous releases, all the way back to perl3, from +your system to avoid being bitten by this. -=head1 New Diagnostics +=head1 Improved Documentation -=over +Many of the platform-specific README files are now part of the perl +installation. See L<perl> for the complete list. + +=over 4 + +=item perlapi.pod + +The official list of public Perl API functions. + +=item perlboot.pod + +A tutorial for beginners on object-oriented Perl. + +=item perlcompile.pod + +An introduction to using the Perl Compiler suite. + +=item perldbmfilter.pod + +A howto document on using the DBM filter facility. + +=item perldebug.pod + +All material unrelated to running the Perl debugger, plus all +low-level guts-like details that risked crushing the casual user +of the debugger, have been relocated from the old manpage to the +next entry below. + +=item perldebguts.pod + +This new manpage contains excessively low-level material not related +to the Perl debugger, but slightly related to debugging Perl itself. +It also contains some arcane internal details of how the debugging +process works that may only be of interest to developers of Perl +debuggers. + +=item perlfork.pod + +Notes on the fork() emulation currently available for the Windows platform. + +=item perlfilter.pod + +An introduction to writing Perl source filters. + +=item perlhack.pod -=item Ambiguous call resolved as CORE::%s(), qualify as such or use & +Some guidelines for hacking the Perl source code. -(W) A subroutine you have declared has the same name as a Perl keyword, -and you have used the name without qualification for calling one or the -other. Perl decided to call the builtin because the subroutine is -not imported. +=item perlintern.pod -To force interpretation as a subroutine call, either put an ampersand -before the subroutine name, or qualify the name with its package. -Alternatively, you can import the subroutine (or pretend that it's -imported with the C<use subs> pragma). +A list of internal functions in the Perl source code. +(List is currently empty.) -To silently interpret it as the Perl operator, use the C<CORE::> prefix -on the operator (e.g. C<CORE::log($x)>) or by declaring the subroutine -to be an object method (see L<attrs>). +=item perllexwarn.pod -=item Bad index while coercing array into hash +Introduction and reference information about lexically scoped +warning categories. -(F) The index looked up in the hash found as the 0'th element of a -pseudo-hash is not legal. Index values must be at 1 or greater. -See L<perlref>. +=item perlnumber.pod -=item Bareword "%s" refers to nonexistent package +Detailed information about numbers as they are represented in Perl. -(W) You used a qualified bareword of the form C<Foo::>, but -the compiler saw no other uses of that namespace before that point. -Perhaps you need to predeclare a package? +=item perlopentut.pod -=item Can't call method "%s" on an undefined value +A tutorial on using open() effectively. -(F) You used the syntax of a method call, but the slot filled by the -object reference or package name contains an undefined value. -Something like this will reproduce the error: +=item perlreftut.pod + +A tutorial that introduces the essentials of references. + +=item perltootc.pod + +A tutorial on managing class data for object modules. + +=item perltodo.pod + +Discussion of the most often wanted features that may someday be +supported in Perl. + +=item perlunicode.pod + +An introduction to Unicode support features in Perl. + +=back + +=head1 Performance enhancements + +=head2 Simple sort() using { $a <=> $b } and the like are optimized + +Many common sort() operations using a simple inlined block are now +optimized for faster performance. + +=head2 Optimized assignments to lexical variables + +Certain operations in the RHS of assignment statements have been +optimized to directly set the lexical variable on the LHS, +eliminating redundant copying overheads. + +=head2 Faster subroutine calls + +Minor changes in how subroutine calls are handled internally +provide marginal improvements in performance. + +=item delete(), each(), values() and hash iteration are faster + +The hash values returned by delete(), each(), values() and hashes in a +list context are the actual values in the hash, instead of copies. +This results in significantly better performance, because it eliminates +needless copying in most situations. + +=head1 Installation and Configuration Improvements + +=head2 -Dusethreads means something different + +The -Dusethreads flag now enables the experimental interpreter-based thread +support by default. To get the flavor of experimental threads that was in +5.005 instead, you need to run Configure with "-Dusethreads -Duse5005threads". + +As of v5.6.0, interpreter-threads support is still lacking a way to +create new threads from Perl (i.e., C<use Thread;> will not work with +interpreter threads). C<use Thread;> continues to be available when you +specify the -Duse5005threads option to Configure, bugs and all. + + NOTE: Support for threads continues to be an experimental feature. + Interfaces and implementation are subject to sudden and drastic changes. + +=head2 New Configure flags + +The following new flags may be enabled on the Configure command line +by running Configure with C<-Dflag>. + + usemultiplicity + usethreads useithreads (new interpreter threads: no Perl API yet) + usethreads use5005threads (threads as they were in 5.005) + + use64bitint (equal to now deprecated 'use64bits') + use64bitall + + uselongdouble + usemorebits + uselargefiles + usesocks (only SOCKS v5 supported) + +=head2 Threadedness and 64-bitness now more daring + +The Configure options enabling the use of threads and the use of +64-bitness are now more daring in the sense that they no more have an +explicit list of operating systems of known threads/64-bit +capabilities. In other words: if your operating system has the +necessary APIs and datatypes, you should be able just to go ahead and +use them, for threads by Configure -Dusethreads, and for 64 bits +either explicitly by Configure -Duse64bitint or implicitly if your +system has 64-bit wide datatypes. See also L<"64-bit support">. + +=head2 Long Doubles + +Some platforms have "long doubles", floating point numbers of even +larger range than ordinary "doubles". To enable using long doubles for +Perl's scalars, use -Duselongdouble. + +=head2 -Dusemorebits + +You can enable both -Duse64bitint and -Duselongdouble with -Dusemorebits. +See also L<"64-bit support">. + +=head2 -Duselargefiles + +Some platforms support system APIs that are capable of handling large files +(typically, files larger than two gigabytes). Perl will try to use these +APIs if you ask for -Duselargefiles. + +See L<"Large file support"> for more information. + +=head2 installusrbinperl + +You can use "Configure -Uinstallusrbinperl" which causes installperl +to skip installing perl also as /usr/bin/perl. This is useful if you +prefer not to modify /usr/bin for some reason or another but harmful +because many scripts assume to find Perl in /usr/bin/perl. + +=head2 SOCKS support + +You can use "Configure -Dusesocks" which causes Perl to probe +for the SOCKS proxy protocol library (v5, not v4). For more information +on SOCKS, see: + + http://www.socks.nec.com/ + +=head2 C<-A> flag + +You can "post-edit" the Configure variables using the Configure C<-A> +switch. The editing happens immediately after the platform specific +hints files have been processed but before the actual configuration +process starts. Run C<Configure -h> to find out the full C<-A> syntax. + +=head2 Enhanced Installation Directories + +The installation structure has been enriched to improve the support +for maintaining multiple versions of perl, to provide locations for +vendor-supplied modules, scripts, and manpages, and to ease maintenance +of locally-added modules, scripts, and manpages. See the section on +Installation Directories in the INSTALL file for complete details. +For most users building and installing from source, the defaults should +be fine. + +If you previously used C<Configure -Dsitelib> or C<-Dsitearch> to set +special values for library directories, you might wish to consider using +the new C<-Dsiteprefix> setting instead. Also, if you wish to re-use a +config.sh file from an earlier version of perl, you should be sure to +check that Configure makes sensible choices for the new directories. +See INSTALL for complete details. + +=head1 Platform specific changes + +=head2 Supported platforms + +=over 4 + +=item * + +VM/ESA is now supported. + +=item * + +Siemens BS2000 is now supported under the POSIX Shell. + +=item * + +The Mach CThreads (NEXTSTEP, OPENSTEP) are now supported by the Thread +extension. + +=item * + +GNU/Hurd is now supported. + +=item * + +Rhapsody/Darwin is now supported. + +=item * + +EPOC is is now supported (on Psion 5). + +=item * + +The cygwin port (formerly cygwin32) has been greatly improved. + +=back + +=head2 DOS + +=over 4 + +=item * + +Perl now works with djgpp 2.02 (and 2.03 alpha). + +=item * + +Environment variable names are not converted to uppercase any more. + +=item * + +Incorrect exit codes from backticks have been fixed. + +=item * + +This port continues to use its own builtin globbing (not File::Glob). + +=back + +=head2 OS390 (OpenEdition MVS) + +Support for this EBCDIC platform has not been renewed in this release. +There are difficulties in reconciling Perl's standardization on UTF-8 +as its internal representation for characters with the EBCDIC character +set, because the two are incompatible. + +It is unclear whether future versions will renew support for this +platform, but the possibility exists. + +=head2 VMS + +Numerous revisions and extensions to configuration, build, testing, and +installation process to accomodate core changes and VMS-specific options. + +Expand %ENV-handling code to allow runtime mapping to logical names, +CLI symbols, and CRTL environ array. + +Extension of subprocess invocation code to accept filespecs as command +"verbs". + +Add to Perl command line processing the ability to use default file types and +to recognize Unix-style C<2E<gt>&1>. + +Expansion of File::Spec::VMS routines, and integration into ExtUtils::MM_VMS. + +Extension of ExtUtils::MM_VMS to handle complex extensions more flexibly. + +Barewords at start of Unix-syntax paths may be treated as text rather than +only as logical names. + +Optional secure translation of several logical names used internally by Perl. + +Miscellaneous bugfixing and porting of new core code to VMS. + +Thanks are gladly extended to the many people who have contributed VMS +patches, testing, and ideas. + +=head2 Win32 + +Perl can now emulate fork() internally, using multiple interpreters running +in different concurrent threads. This support must be enabled at build +time. See L<perlfork> for detailed information. + +When given a pathname that consists only of a drivename, such as C<A:>, +opendir() and stat() now use the current working directory for the drive +rather than the drive root. + +The builtin XSUB functions in the Win32:: namespace are documented. See +L<Win32>. + +$^X now contains the full path name of the running executable. + +A Win32::GetLongPathName() function is provided to complement +Win32::GetFullPathName() and Win32::GetShortPathName(). See L<Win32>. + +POSIX::uname() is supported. + +system(1,...) now returns true process IDs rather than process +handles. kill() accepts any real process id, rather than strictly +return values from system(1,...). + +For better compatibility with Unix, C<kill(0, $pid)> can now be used to +test whether a process exists. + +The C<Shell> module is supported. + +Better support for building Perl under command.com in Windows 95 +has been added. + +Scripts are read in binary mode by default to allow ByteLoader (and +the filter mechanism in general) to work properly. For compatibility, +the DATA filehandle will be set to text mode if a carriage return is +detected at the end of the line containing the __END__ or __DATA__ +token; if not, the DATA filehandle will be left open in binary mode. +Earlier versions always opened the DATA filehandle in text mode. + +The glob() operator is implemented via the C<File::Glob> extension, +which supports glob syntax of the C shell. This increases the flexibility +of the glob() operator, but there may be compatibility issues for +programs that relied on the older globbing syntax. If you want to +preserve compatibility with the older syntax, you might want to run +perl with C<-MFile::DosGlob>. For details and compatibility information, +see L<File::Glob>. + +=head1 Significant bug fixes + +=head2 <HANDLE> on empty files + +With C<$/> set to C<undef>, "slurping" an empty file returns a string of +zero length (instead of C<undef>, as it used to) the first time the +HANDLE is read after C<$/> is set to C<undef>. Further reads yield +C<undef>. + +This means that the following will append "foo" to an empty file (it used +to do nothing): + + perl -0777 -pi -e 's/^/foo/' empty_file + +The behaviour of: + + perl -pi -e 's/^/foo/' empty_file + +is unchanged (it continues to leave the file empty). + +=head2 C<eval '...'> improvements + +Line numbers (as reflected by caller() and most diagnostics) within +C<eval '...'> were often incorrect where here documents were involved. +This has been corrected. + +Lexical lookups for variables appearing in C<eval '...'> within +functions that were themselves called within an C<eval '...'> were +searching the wrong place for lexicals. The lexical search now +correctly ends at the subroutine's block boundary. + +The use of C<return> within C<eval {...}> caused $@ not to be reset +correctly when no exception occurred within the eval. This has +been fixed. + +Parsing of here documents used to be flawed when they appeared as +the replacement expression in C<eval 's/.../.../e'>. This has +been fixed. + +=head2 All compilation errors are true errors + +Some "errors" encountered at compile time were by neccessity +generated as warnings followed by eventual termination of the +program. This enabled more such errors to be reported in a +single run, rather than causing a hard stop at the first error +that was encountered. + +The mechanism for reporting such errors has been reimplemented +to queue compile-time errors and report them at the end of the +compilation as true errors rather than as warnings. This fixes +cases where error messages leaked through in the form of warnings +when code was compiled at run time using C<eval STRING>, and +also allows such errors to be reliably trapped using C<eval "...">. + +=head2 Implicitly closed filehandles are safer + +Sometimes implicitly closed filehandles (as when they are localized, +and Perl automatically closes them on exiting the scope) could +inadvertently set $? or $!. This has been corrected. + + +=head2 Behavior of list slices is more consistent + +When taking a slice of a literal list (as opposed to a slice of +an array or hash), Perl used to return an empty list if the +result happened to be composed of all undef values. + +The new behavior is to produce an empty list if (and only if) +the original list was empty. Consider the following example: + + @a = (1,undef,undef,2)[2,1,2]; + +The old behavior would have resulted in @a having no elements. +The new behavior ensures it has three undefined elements. + +Note in particular that the behavior of slices of the following +cases remains unchanged: + + @a = ()[1,2]; + @a = (getpwent)[7,0]; + @a = (anything_returning_empty_list())[2,1,2]; + @a = @b[2,1,2]; + @a = @c{'a','b','c'}; + +See L<perldata>. + +=head2 C<(\$)> prototype and C<$foo{a}> + +A scalar reference prototype now correctly allows a hash or +array element in that slot. + +=head2 C<goto &sub> and AUTOLOAD + +The C<goto &sub> construct works correctly when C<&sub> happens +to be autoloaded. + +=head2 C<-bareword> allowed under C<use integer> + +The autoquoting of barewords preceded by C<-> did not work +in prior versions when the C<integer> pragma was enabled. +This has been fixed. + +=head2 Failures in DESTROY() + +When code in a destructor threw an exception, it went unnoticed +in earlier versions of Perl, unless someone happened to be +looking in $@ just after the point the destructor happened to +run. Such failures are now visible as warnings when warnings are +enabled. + +=head2 Locale bugs fixed + +printf() and sprintf() previously reset the numeric locale +back to the default "C" locale. This has been fixed. + +Numbers formatted according to the local numeric locale +(such as using a decimal comma instead of a decimal dot) caused +"isn't numeric" warnings, even while the operations accessing +those numbers produced correct results. These warnings have been +discontinued. + +=head2 Memory leaks + +The C<eval 'return sub {...}'> construct could sometimes leak +memory. This has been fixed. + +Operations that aren't filehandle constructors used to leak memory +when used on invalid filehandles. This has been fixed. + +Constructs that modified C<@_> could fail to deallocate values +in C<@_> and thus leak memory. This has been corrected. + +=head2 Spurious subroutine stubs after failed subroutine calls + +Perl could sometimes create empty subroutine stubs when a +subroutine was not found in the package. Such cases stopped +later method lookups from progressing into base packages. +This has been corrected. + +=head2 Taint failures under C<-U> + +When running in unsafe mode, taint violations could sometimes +cause silent failures. This has been fixed. + +=head2 END blocks and the C<-c> switch + +Prior versions used to run BEGIN B<and> END blocks when Perl was +run in compile-only mode. Since this is typically not the expected +behavior, END blocks are not executed anymore when the C<-c> switch +is used. + +See L<CHECK blocks> for how to run things when the compile phase ends. + +=head2 Potential to leak DATA filehandles + +Using the C<__DATA__> token creates an implicit filehandle to +the file that contains the token. It is the program's +responsibility to close it when it is done reading from it. + +This caveat is now better explained in the documentation. +See L<perldata>. + +=head1 New or Changed Diagnostics + +=over 4 - $BADREF = 42; - process $BADREF 1,2,3; - $BADREF->process(1,2,3); +=item "%s" variable %s masks earlier declaration in same %s -=item Can't check filesystem of script "%s" for nosuid +(W misc) A "my" or "our" variable has been redeclared in the current scope or statement, +effectively eliminating all access to the previous instance. This is almost +always a typographical error. Note that the earlier variable will still exist +until the end of the scope or until all closure referents to it are +destroyed. + +=item "my sub" not yet implemented + +(F) Lexically scoped subroutines are not yet implemented. Don't try that +yet. + +=item "our" variable %s redeclared + +(W misc) You seem to have already declared the same global once before in the +current lexical scope. + +=item '!' allowed only after types %s + +(F) The '!' is allowed in pack() and unpack() only after certain types. +See L<perlfunc/pack>. + +=item / cannot take a count + +(F) You had an unpack template indicating a counted-length string, +but you have also specified an explicit size for the string. +See L<perlfunc/pack>. + +=item / must be followed by a, A or Z + +(F) You had an unpack template indicating a counted-length string, +which must be followed by one of the letters a, A or Z +to indicate what sort of string is to be unpacked. +See L<perlfunc/pack>. + +=item / must be followed by a*, A* or Z* + +(F) You had a pack template indicating a counted-length string, +Currently the only things that can have their length counted are a*, A* or Z*. +See L<perlfunc/pack>. + +=item / must follow a numeric type + +(F) You had an unpack template that contained a '#', +but this did not follow some numeric unpack specification. +See L<perlfunc/pack>. + +=item /%s/: Unrecognized escape \\%c passed through + +(W regexp) You used a backslash-character combination which is not recognized +by Perl. This combination appears in an interpolated variable or a +C<'>-delimited regular expression. The character was understood literally. + +=item /%s/: Unrecognized escape \\%c in character class passed through + +(W regexp) You used a backslash-character combination which is not recognized +by Perl inside character classes. The character was understood literally. + +=item /%s/ should probably be written as "%s" + +(W syntax) You have used a pattern where Perl expected to find a string, +as in the first argument to C<join>. Perl will treat the true +or false result of matching the pattern against $_ as the string, +which is probably not what you had in mind. + +=item %s() called too early to check prototype + +(W prototype) You've called a function that has a prototype before the parser saw a +definition or declaration for it, and Perl could not check that the call +conforms to the prototype. You need to either add an early prototype +declaration for the subroutine in question, or move the subroutine +definition ahead of the call to get proper prototype checking. Alternatively, +if you are certain that you're calling the function correctly, you may put +an ampersand before the name to avoid the warning. See L<perlsub>. + +=item %s argument is not a HASH or ARRAY element + +(F) The argument to exists() must be a hash or array element, such as: + + $foo{$bar} + $ref->{"susie"}[12] + +=item %s argument is not a HASH or ARRAY element or slice + +(F) The argument to delete() must be either a hash or array element, such as: + + $foo{$bar} + $ref->{"susie"}[12] + +or a hash or array slice, such as: + + @foo[$bar, $baz, $xyzzy] + @{$ref->[12]}{"susie", "queue"} + +=item %s argument is not a subroutine name + +(F) The argument to exists() for C<exists &sub> must be a subroutine +name, and not a subroutine call. C<exists &sub()> will generate this error. + +=item %s package attribute may clash with future reserved word: %s + +(W reserved) A lowercase attribute name was used that had a package-specific handler. +That name might have a meaning to Perl itself some day, even though it +doesn't yet. Perhaps you should use a mixed-case attribute name, instead. +See L<attributes>. + +=item (in cleanup) %s + +(W misc) This prefix usually indicates that a DESTROY() method raised +the indicated exception. Since destructors are usually called by +the system at arbitrary points during execution, and often a vast +number of times, the warning is issued only once for any number +of failures that would otherwise result in the same message being +repeated. + +Failure of user callbacks dispatched using the C<G_KEEPERR> flag +could also result in this warning. See L<perlcall/G_KEEPERR>. + +=item <> should be quotes + +(F) You wrote C<< require <file> >> when you should have written +C<require 'file'>. + +=item Attempt to join self + +(F) You tried to join a thread from within itself, which is an +impossible task. You may be joining the wrong thread, or you may +need to move the join() to some other thread. + +=item Bad evalled substitution pattern + +(F) You've used the /e switch to evaluate the replacement for a +substitution, but perl found a syntax error in the code to evaluate, +most likely an unexpected right brace '}'. + +=item Bad realloc() ignored + +(S) An internal routine called realloc() on something that had never been +malloc()ed in the first place. Mandatory, but can be disabled by +setting environment variable C<PERL_BADFREE> to 1. + +=item Bareword found in conditional + +(W bareword) The compiler found a bareword where it expected a conditional, +which often indicates that an || or && was parsed as part of the +last argument of the previous construct, for example: + + open FOO || die; + +It may also indicate a misspelled constant that has been interpreted +as a bareword: + + use constant TYPO => 1; + if (TYOP) { print "foo" } + +The C<strict> pragma is useful in avoiding such errors. + +=item Binary number > 0b11111111111111111111111111111111 non-portable + +(W portable) The binary number you specified is larger than 2**32-1 +(4294967295) and therefore non-portable between systems. See +L<perlport> for more on portability concerns. + +=item Bit vector size > 32 non-portable + +(W portable) Using bit vector sizes larger than 32 is non-portable. + +=item Buffer overflow in prime_env_iter: %s + +(W internal) A warning peculiar to VMS. While Perl was preparing to iterate over +%ENV, it encountered a logical name or symbol definition which was too long, +so it was truncated to the string shown. + +=item Can't check filesystem of script "%s" (P) For some reason you can't check the filesystem of the script for nosuid. -=item Can't coerce array into hash +=item Can't declare class for non-scalar %s in "%s" -(F) You used an array where a hash was expected, but the array has no -information on how to map from keys to array indices. You can do that -only with arrays that have a hash reference at index 0. +(S) Currently, only scalar variables can declared with a specific class +qualifier in a "my" or "our" declaration. The semantics may be extended +for other types of variables in future. -=item Can't goto subroutine from an eval-string +=item Can't declare %s in "%s" -(F) The "goto subroutine" call can't be used to jump out of an eval "string". -(You can use it to jump out of an eval {BLOCK}, but you probably don't want to.) +(F) Only scalar, array, and hash variables may be declared as "my" or +"our" variables. They must have ordinary identifiers as names. -=item Can't localize pseudo-hash element +=item Can't ignore signal CHLD, forcing to default -(F) You said something like C<local $ar-E<gt>{'key'}>, where $ar is -a reference to a pseudo-hash. That hasn't been implemented yet, but -you can get a similar effect by localizing the corresponding array -element directly -- C<local $ar-E<gt>[$ar-E<gt>[0]{'key'}]>. +(W signal) Perl has detected that it is being run with the SIGCHLD signal +(sometimes known as SIGCLD) disabled. Since disabling this signal +will interfere with proper determination of exit status of child +processes, Perl has reset the signal to its default value. +This situation typically indicates that the parent program under +which Perl may be running (e.g., cron) is being very careless. -=item Can't use %%! because Errno.pm is not available +=item Can't modify non-lvalue subroutine call -(F) The first time the %! hash is used, perl automatically loads the -Errno.pm module. The Errno module is expected to tie the %! hash to -provide symbolic names for C<$!> errno values. +(F) Subroutines meant to be used in lvalue context should be declared as +such, see L<perlsub/"Lvalue subroutines">. -=item Cannot find an opnumber for "%s" +=item Can't read CRTL environ -(F) A string of a form C<CORE::word> was given to prototype(), but -there is no builtin with the name C<word>. +(S) A warning peculiar to VMS. Perl tried to read an element of %ENV +from the CRTL's internal environment array and discovered the array was +missing. You need to figure out where your CRTL misplaced its environ +or define F<PERL_ENV_TABLES> (see L<perlvms>) so that environ is not searched. -=item Character class syntax [. .] is reserved for future extensions +=item Can't remove %s: %s, skipping file -(W) Within regular expression character classes ([]) the syntax beginning -with "[." and ending with ".]" is reserved for future extensions. -If you need to represent those character sequences inside a regular -expression character class, just quote the square brackets with the -backslash: "\[." and ".\]". +(S) You requested an inplace edit without creating a backup file. Perl +was unable to remove the original file to replace it with the modified +file. The file was left unmodified. -=item Character class syntax [: :] is reserved for future extensions +=item Can't return %s from lvalue subroutine -(W) Within regular expression character classes ([]) the syntax beginning -with "[:" and ending with ":]" is reserved for future extensions. -If you need to represent those character sequences inside a regular -expression character class, just quote the square brackets with the -backslash: "\[:" and ":\]". +(F) Perl detected an attempt to return illegal lvalues (such +as temporary or readonly values) from a subroutine used as an lvalue. +This is not allowed. -=item Character class syntax [= =] is reserved for future extensions +=item Can't weaken a nonreference -(W) Within regular expression character classes ([]) the syntax -beginning with "[=" and ending with "=]" is reserved for future extensions. -If you need to represent those character sequences inside a regular -expression character class, just quote the square brackets with the -backslash: "\[=" and "=\]". +(F) You attempted to weaken something that was not a reference. Only +references can be weakened. + +=item Character class [:%s:] unknown + +(F) The class in the character class [: :] syntax is unknown. +See L<perlre>. + +=item Character class syntax [%s] belongs inside character classes + +(W unsafe) The character class constructs [: :], [= =], and [. .] go +I<inside> character classes, the [] are part of the construct, +for example: /[012[:alpha:]345]/. Note that [= =] and [. .] +are not currently implemented; they are simply placeholders for +future extensions. + +=item Constant is not %s reference + +(F) A constant value (perhaps declared using the C<use constant> pragma) +is being dereferenced, but it amounts to the wrong type of reference. The +message indicates the type of reference that was expected. This usually +indicates a syntax error in dereferencing the constant value. +See L<perlsub/"Constant Functions"> and L<constant>. + +=item constant(%s): %s + +(F) The parser found inconsistencies either while attempting to define an +overloaded constant, or when trying to find the character name specified +in the C<\N{...}> escape. Perhaps you forgot to load the corresponding +C<overload> or C<charnames> pragma? See L<charnames> and L<overload>. + +=item CORE::%s is not a keyword + +(F) The CORE:: namespace is reserved for Perl keywords. + +=item defined(@array) is deprecated + +(D) defined() is not usually useful on arrays because it checks for an +undefined I<scalar> value. If you want to see if the array is empty, +just use C<if (@array) { # not empty }> for example. + +=item defined(%hash) is deprecated + +(D) defined() is not usually useful on hashes because it checks for an +undefined I<scalar> value. If you want to see if the hash is empty, +just use C<if (%hash) { # not empty }> for example. + +=item Did not produce a valid header + +See Server error. + +=item (Did you mean "local" instead of "our"?) + +(W misc) Remember that "our" does not localize the declared global variable. +You have declared it again in the same lexical scope, which seems superfluous. + +=item Document contains no data + +See Server error. + +=item entering effective %s failed + +(F) While under the C<use filetest> pragma, switching the real and +effective uids or gids failed. + +=item false [] range "%s" in regexp + +(W regexp) A character class range must start and end at a literal character, not +another character class like C<\d> or C<[:alpha:]>. The "-" in your false +range is interpreted as a literal "-". Consider quoting the "-", "\-". +See L<perlre>. + +=item Filehandle %s opened only for output + +(W io) You tried to read from a filehandle opened only for writing. If you +intended it to be a read/write filehandle, you needed to open it with +"+<" or "+>" or "+>>" instead of with "<" or nothing. If +you intended only to read from the file, use "<". See +L<perlfunc/open>. + +=item flock() on closed filehandle %s + +(W closed) The filehandle you're attempting to flock() got itself closed some +time before now. Check your logic flow. flock() operates on filehandles. +Are you attempting to call flock() on a dirhandle by the same name? + +=item Global symbol "%s" requires explicit package name + +(F) You've said "use strict vars", which indicates that all variables +must either be lexically scoped (using "my"), declared beforehand using +"our", or explicitly qualified to say which package the global variable +is in (using "::"). + +=item Hexadecimal number > 0xffffffff non-portable + +(W portable) The hexadecimal number you specified is larger than 2**32-1 +(4294967295) and therefore non-portable between systems. See +L<perlport> for more on portability concerns. + +=item Ill-formed CRTL environ value "%s" + +(W internal) A warning peculiar to VMS. Perl tried to read the CRTL's internal +environ array, and encountered an element without the C<=> delimiter +used to spearate keys from values. The element is ignored. + +=item Ill-formed message in prime_env_iter: |%s| + +(W internal) A warning peculiar to VMS. Perl tried to read a logical name +or CLI symbol definition when preparing to iterate over %ENV, and +didn't see the expected delimiter between key and value, so the +line was ignored. + +=item Illegal binary digit %s + +(F) You used a digit other than 0 or 1 in a binary number. + +=item Illegal binary digit %s ignored + +(W digit) You may have tried to use a digit other than 0 or 1 in a binary number. +Interpretation of the binary number stopped before the offending digit. + +=item Illegal number of bits in vec + +(F) The number of bits in vec() (the third argument) must be a power of +two from 1 to 32 (or 64, if your platform supports that). + +=item Integer overflow in %s number + +(W overflow) The hexadecimal, octal or binary number you have specified either +as a literal or as an argument to hex() or oct() is too big for your +architecture, and has been converted to a floating point number. On a +32-bit architecture the largest hexadecimal, octal or binary number +representable without overflow is 0xFFFFFFFF, 037777777777, or +0b11111111111111111111111111111111 respectively. Note that Perl +transparently promotes all numbers to a floating point representation +internally--subject to loss of precision errors in subsequent +operations. + +=item Invalid %s attribute: %s + +The indicated attribute for a subroutine or variable was not recognized +by Perl or by a user-supplied handler. See L<attributes>. + +=item Invalid %s attributes: %s + +The indicated attributes for a subroutine or variable were not recognized +by Perl or by a user-supplied handler. See L<attributes>. + +=item invalid [] range "%s" in regexp + +The offending range is now explicitly displayed. + +=item Invalid separator character %s in attribute list + +(F) Something other than a colon or whitespace was seen between the +elements of an attribute list. If the previous attribute +had a parenthesised parameter list, perhaps that list was terminated +too soon. See L<attributes>. + +=item Invalid separator character %s in subroutine attribute list + +(F) Something other than a colon or whitespace was seen between the +elements of a subroutine attribute list. If the previous attribute +had a parenthesised parameter list, perhaps that list was terminated +too soon. + +=item leaving effective %s failed + +(F) While under the C<use filetest> pragma, switching the real and +effective uids or gids failed. + +=item Lvalue subs returning %s not implemented yet + +(F) Due to limitations in the current implementation, array and hash +values cannot be returned in subroutines used in lvalue context. +See L<perlsub/"Lvalue subroutines">. + +=item Method %s not permitted + +See Server error. + +=item Missing %sbrace%s on \N{} + +(F) Wrong syntax of character name literal C<\N{charname}> within +double-quotish context. + +=item Missing command in piped open + +(W pipe) You used the C<open(FH, "| command")> or C<open(FH, "command |")> +construction, but the command was missing or blank. + +=item Missing name in "my sub" + +(F) The reserved syntax for lexically scoped subroutines requires that they +have a name with which they can be found. + +=item No %s specified for -%c + +(F) The indicated command line switch needs a mandatory argument, but +you haven't specified one. + +=item No package name allowed for variable %s in "our" + +(F) Fully qualified variable names are not allowed in "our" declarations, +because that doesn't make much sense under existing semantics. Such +syntax is reserved for future extensions. + +=item No space allowed after -%c + +(F) The argument to the indicated command line switch must follow immediately +after the switch, without intervening spaces. + +=item no UTC offset information; assuming local time is UTC + +(S) A warning peculiar to VMS. Perl was unable to find the local +timezone offset, so it's assuming that local system time is equivalent +to UTC. If it's not, define the logical name F<SYS$TIMEZONE_DIFFERENTIAL> +to translate to the number of seconds which need to be added to UTC to +get local time. + +=item Octal number > 037777777777 non-portable + +(W portable) The octal number you specified is larger than 2**32-1 (4294967295) +and therefore non-portable between systems. See L<perlport> for more +on portability concerns. + +See also L<perlport> for writing portable code. + +=item panic: del_backref + +(P) Failed an internal consistency check while trying to reset a weak +reference. + +=item panic: kid popen errno read + +(F) forked child returned an incomprehensible message about its errno. + +=item panic: magic_killbackrefs + +(P) Failed an internal consistency check while trying to reset all weak +references to an object. + +=item Parentheses missing around "%s" list + +(W parenthesis) You said something like + + my $foo, $bar = @_; + +when you meant + + my ($foo, $bar) = @_; + +Remember that "my", "our", and "local" bind tighter than comma. + +=item Possible Y2K bug: %s + +(W y2k) You are concatenating the number 19 with another number, which +could be a potential Year 2000 problem. + +=item pragma "attrs" is deprecated, use "sub NAME : ATTRS" instead + +(W deprecated) You have written somehing like this: + + sub doit + { + use attrs qw(locked); + } + +You should use the new declaration syntax instead. + + sub doit : locked + { + ... + +The C<use attrs> pragma is now obsolete, and is only provided for +backward-compatibility. See L<perlsub/"Subroutine Attributes">. + + +=item Premature end of script headers + +See Server error. + +=item Repeat count in pack overflows + +(F) You can't specify a repeat count so large that it overflows +your signed integers. See L<perlfunc/pack>. + +=item Repeat count in unpack overflows -=item %s: Eval-group in insecure regular expression +(F) You can't specify a repeat count so large that it overflows +your signed integers. See L<perlfunc/unpack>. -(F) Perl detected tainted data when trying to compile a regular expression -that contains the C<(?{ ... })> zero-width assertion, which is unsafe. -See L<perlre/(?{ code })>, and L<perlsec>. +=item realloc() of freed memory ignored -=item %s: Eval-group not allowed, use re 'eval' +(S) An internal routine called realloc() on something that had already +been freed. -(F) A regular expression contained the C<(?{ ... })> zero-width assertion, -but that construct is only allowed when the C<use re 'eval'> pragma is -in effect. See L<perlre/(?{ code })>. +=item Reference is already weak -=item %s: Eval-group not allowed at run time +(W misc) You have attempted to weaken a reference that is already weak. +Doing so has no effect. -(F) Perl tried to compile a regular expression containing the C<(?{ ... })> -zero-width assertion at run time, as it would when the pattern contains -interpolated values. Since that is a security risk, it is not allowed. -If you insist, you may still do this by explicitly building the pattern -from an interpolated string at run time and using that in an eval(). -See L<perlre/(?{ code })>. +=item setpgrp can't take arguments -=item Explicit blessing to '' (assuming package main) +(F) Your system has the setpgrp() from BSD 4.2, which takes no arguments, +unlike POSIX setpgid(), which takes a process ID and process group ID. -(W) You are blessing a reference to a zero length string. This has -the effect of blessing the reference into the package main. This is -usually not what you want. Consider providing a default target -package, e.g. bless($ref, $p || 'MyPackage'); +=item Strange *+?{} on zero-length expression -=item Illegal hex digit ignored +(W regexp) You applied a regular expression quantifier in a place where it +makes no sense, such as on a zero-width assertion. +Try putting the quantifier inside the assertion instead. For example, +the way to match "abc" provided that it is followed by three +repetitions of "xyz" is C</abc(?=(?:xyz){3})/>, not C</abc(?=xyz){3}/>. -(W) You may have tried to use a character other than 0 - 9 or A - F in a -hexadecimal number. Interpretation of the hexadecimal number stopped -before the illegal character. +=item switching effective %s is not implemented -=item No such array field +(F) While under the C<use filetest> pragma, we cannot switch the +real and effective uids or gids. -(F) You tried to access an array as a hash, but the field name used is -not defined. The hash at index 0 should map all valid field names to -array indices for that to work. +=item This Perl can't reset CRTL environ elements (%s) -=item No such field "%s" in variable %s of type %s +=item This Perl can't set CRTL environ elements (%s=%s) -(F) You tried to access a field of a typed variable where the type -does not know about the field name. The field names are looked up in -the %FIELDS hash in the type package at compile time. The %FIELDS hash -is usually set up with the 'fields' pragma. +(W internal) Warnings peculiar to VMS. You tried to change or delete an element +of the CRTL's internal environ array, but your copy of Perl wasn't +built with a CRTL that contained the setenv() function. You'll need to +rebuild Perl with a CRTL that does, or redefine F<PERL_ENV_TABLES> (see +L<perlvms>) so that the environ array isn't the target of the change to +%ENV which produced the warning. -=item Out of memory during ridiculously large request +=item Too late to run %s block -(F) You can't allocate more than 2^31+"small amount" bytes. This error -is most likely to be caused by a typo in the Perl program. e.g., C<$arr[time]> -instead of C<$arr[$time]>. +(W void) A CHECK or INIT block is being defined during run time proper, +when the opportunity to run them has already passed. Perhaps you are +loading a file with C<require> or C<do> when you should be using +C<use> instead. Or perhaps you should put the C<require> or C<do> +inside a BEGIN block. -=item Range iterator outside integer range +=item Unknown open() mode '%s' -(F) One (or both) of the numeric arguments to the range operator ".." -are outside the range which can be represented by integers internally. -One possible workaround is to force Perl to use magical string -increment by prepending "0" to your numbers. +(F) The second argument of 3-argument open() is not among the list +of valid modes: C<< < >>, C<< > >>, C<<< >> >>>, C<< +< >>, +C<< +> >>, C<<< +>> >>>, C<-|>, C<|->. -=item Recursive inheritance detected while looking for method '%s' in package '%s' +=item Unknown process %x sent message to prime_env_iter: %s -(F) More than 100 levels of inheritance were encountered while invoking a -method. Probably indicates an unintended loop in your inheritance hierarchy. +(P) An error peculiar to VMS. Perl was reading values for %ENV before +iterating over it, and someone else stuck a message in the stream of +data Perl expected. Someone's very confused, or perhaps trying to +subvert Perl's population of %ENV for nefarious purposes. -=item Reference found where even-sized list expected +=item Unrecognized escape \\%c passed through -(W) You gave a single reference where Perl was expecting a list with -an even number of elements (for assignment to a hash). This -usually means that you used the anon hash constructor when you meant -to use parens. In any case, a hash requires key/value B<pairs>. +(W misc) You used a backslash-character combination which is not recognized +by Perl. The character was understood literally. - %hash = { one => 1, two => 2, }; # WRONG - %hash = [ qw/ an anon array / ]; # WRONG - %hash = ( one => 1, two => 2, ); # right - %hash = qw( one 1 two 2 ); # also fine +=item Unterminated attribute parameter in attribute list -=item Undefined value assigned to typeglob +(F) The lexer saw an opening (left) parenthesis character while parsing an +attribute list, but the matching closing (right) parenthesis +character was not found. You may need to add (or remove) a backslash +character to get your parentheses to balance. See L<attributes>. -(W) An undefined value was assigned to a typeglob, a la C<*foo = undef>. -This does nothing. It's possible that you really mean C<undef *foo>. +=item Unterminated attribute list -=item Use of reserved word "%s" is deprecated +(F) The lexer found something other than a simple identifier at the start +of an attribute, and it wasn't a semicolon or the start of a +block. Perhaps you terminated the parameter list of the previous attribute +too soon. See L<attributes>. -(D) The indicated bareword is a reserved word. Future versions of perl -may use it as a keyword, so you're better off either explicitly quoting -the word in a manner appropriate for its context of use, or using a -different name altogether. The warning can be suppressed for subroutine -names by either adding a C<&> prefix, or using a package qualifier, -e.g. C<&our()>, or C<Foo::our()>. +=item Unterminated attribute parameter in subroutine attribute list -=item perl: warning: Setting locale failed. +(F) The lexer saw an opening (left) parenthesis character while parsing a +subroutine attribute list, but the matching closing (right) parenthesis +character was not found. You may need to add (or remove) a backslash +character to get your parentheses to balance. -(S) The whole warning message will look something like: +=item Unterminated subroutine attribute list - perl: warning: Setting locale failed. - perl: warning: Please check that your locale settings: - LC_ALL = "En_US", - LANG = (unset) - are supported and installed on your system. - perl: warning: Falling back to the standard locale ("C"). +(F) The lexer found something other than a simple identifier at the start +of a subroutine attribute, and it wasn't a semicolon or the start of a +block. Perhaps you terminated the parameter list of the previous attribute +too soon. -Exactly what were the failed locale settings varies. In the above the -settings were that the LC_ALL was "En_US" and the LANG had no value. -This error means that Perl detected that you and/or your system -administrator have set up the so-called variable system but Perl could -not use those settings. This was not dead serious, fortunately: there -is a "default locale" called "C" that Perl can and will use, the -script will be run. Before you really fix the problem, however, you -will get the same error message each time you run Perl. How to really -fix the problem can be found in L<perllocale/"LOCALE PROBLEMS">. +=item Value of CLI symbol "%s" too long + +(W misc) A warning peculiar to VMS. Perl tried to read the value of an %ENV +element from a CLI symbol table, and found a resultant string longer +than 1024 characters. The return value has been truncated to 1024 +characters. + +=item Version number must be a constant number + +(P) The attempt to translate a C<use Module n.n LIST> statement into +its equivalent C<BEGIN> block found an internal inconsistency with +the version number. =back +=head1 New tests -=head1 Obsolete Diagnostics +=over 4 + +=item lib/attrs + +Compatibility tests for C<sub : attrs> vs the older C<use attrs>. + +=item lib/env + +Tests for new environment scalar capability (e.g., C<use Env qw($BAR);>). + +=item lib/env-array + +Tests for new environment array capability (e.g., C<use Env qw(@PATH);>). + +=item lib/io_const + +IO constants (SEEK_*, _IO*). + +=item lib/io_dir + +Directory-related IO methods (new, read, close, rewind, tied delete). + +=item lib/io_multihomed + +INET sockets with multi-homed hosts. + +=item lib/io_poll + +IO poll(). + +=item lib/io_unix + +UNIX sockets. + +=item op/attrs + +Regression tests for C<my ($x,@y,%z) : attrs> and <sub : attrs>. + +=item op/filetest + +File test operators. + +=item op/lex_assign + +Verify operations that access pad objects (lexicals and temporaries). + +=item op/exists_sub + +Verify C<exists &sub> operations. + +=back + +=head1 Incompatible Changes + +=head2 Perl Source Incompatibilities + +Beware that any new warnings that have been added or old ones +that have been enhanced are B<not> considered incompatible changes. + +Since all new warnings must be explicitly requested via the C<-w> +switch or the C<warnings> pragma, it is ultimately the programmer's +responsibility to ensure that warnings are enabled judiciously. + +=over 4 + +=item CHECK is a new keyword + +All subroutine definitions named CHECK are now special. See +C</"Support for CHECK blocks"> for more information. + +=item Treatment of list slices of undef has changed + +There is a potential incompatibility in the behavior of list slices +that are comprised entirely of undefined values. +See L</"Behavior of list slices is more consistent">. + +=head2 Format of $English::PERL_VERSION is different + +The English module now sets $PERL_VERSION to $^V (a string value) rather +than C<$]> (a numeric value). This is a potential incompatibility. +Send us a report via perlbug if you are affected by this. + +See L</"Improved Perl version numbering system"> for the reasons for +this change. + +=item Literals of the form C<1.2.3> parse differently + +Previously, numeric literals with more than one dot in them were +interpreted as a floating point number concatenated with one or more +numbers. Such "numbers" are now parsed as strings composed of the +specified ordinals. + +For example, C<print 97.98.99> used to output C<97.9899> in earlier +versions, but now prints C<abc>. + +See L</"Support for strings represented as a vector of ordinals">. + +=item Possibly changed pseudo-random number generator + +Perl programs that depend on reproducing a specific set of pseudo-random +numbers may now produce different output due to improvements made to the +rand() builtin. You can use C<sh Configure -Drandfunc=rand> to obtain +the old behavior. + +See L</"Better pseudo-random number generator">. + +=item Hashing function for hash keys has changed + +Even though Perl hashes are not order preserving, the apparently +random order encountered when iterating on the contents of a hash +is actually determined by the hashing algorithm used. Improvements +in the algorithm may yield a random order that is B<different> from +that of previous versions, especially when iterating on hashes. + +See L</"Better worst-case behavior of hashes"> for additional +information. + +=item C<undef> fails on read only values + +Using the C<undef> operator on a readonly value (such as $1) has +the same effect as assigning C<undef> to the readonly value--it +throws an exception. + +=item Close-on-exec bit may be set on pipe and socket handles + +Pipe and socket handles are also now subject to the close-on-exec +behavior determined by the special variable $^F. + +See L</"More consistent close-on-exec behavior">. + +=item Writing C<"$$1"> to mean C<"${$}1"> is unsupported + +Perl 5.004 deprecated the interpretation of C<$$1> and +similar within interpolated strings to mean C<$$ . "1">, +but still allowed it. + +In Perl 5.6.0 and later, C<"$$1"> always means C<"${$1}">. + +=item delete(), values() and C<\(%h)> operate on aliases to values, not copies + +delete(), each(), values() and hashes in a list context return the actual +values in the hash, instead of copies (as they used to in earlier +versions). Typical idioms for using these constructs copy the +returned values, but this can make a significant difference when +creating references to the returned values. Keys in the hash are still +returned as copies when iterating on a hash. + +See also L</"delete(), each(), values() and hash iteration are faster">. + +=item vec(EXPR,OFFSET,BITS) enforces powers-of-two BITS + +vec() generates a run-time error if the BITS argument is not +a valid power-of-two integer. + +=item Text of some diagnostic output has changed + +Most references to internal Perl operations in diagnostics +have been changed to be more descriptive. This may be an +issue for programs that may incorrectly rely on the exact +text of diagnostics for proper functioning. + +=item C<%@> has been removed + +The undocumented special variable C<%@> that used to accumulate +"background" errors (such as those that happen in DESTROY()) +has been removed, because it could potentially result in memory +leaks. + +=item Parenthesized not() behaves like a list operator + +The C<not> operator now falls under the "if it looks like a function, +it behaves like a function" rule. + +As a result, the parenthesized form can be used with C<grep> and C<map>. +The following construct used to be a syntax error before, but it works +as expected now: + + grep not($_), @things; + +On the other hand, using C<not> with a literal list slice may not +work. The following previously allowed construct: + + print not (1,2,3)[0]; + +needs to be written with additional parentheses now: + + print not((1,2,3)[0]); + +The behavior remains unaffected when C<not> is not followed by parentheses. + +=item Semantics of bareword prototype C<(*)> have changed + +The semantics of the bareword prototype C<*> have changed. Perl 5.005 +always coerced simple scalar arguments to a typeglob, which wasn't useful +in situations where the subroutine must distinguish between a simple +scalar and a typeglob. The new behavior is to not coerce bareword +arguments to a typeglob. The value will always be visible as either +a simple scalar or as a reference to a typeglob. + +See L</"More functional bareword prototype (*)">. + +=head2 Semantics of bit operators may have changed on 64-bit platforms + +If your platform is either natively 64-bit or if Perl has been +configured to used 64-bit integers, i.e., $Config{ivsize} is 8, +there may be a potential incompatibility in the behavior of bitwise +numeric operators (& | ^ ~ << >>). These operators used to strictly +operate on the lower 32 bits of integers in previous versions, but now +operate over the entire native integral width. In particular, note +that unary C<~> will produce different results on platforms that have +different $Config{ivsize}. For portability, be sure to mask off +the excess bits in the result of unary C<~>, e.g., C<~$x & 0xffffffff>. + +See L</"Bit operators support full native integer width">. + +=head2 More builtins taint their results + +As described in L</"Improved security features">, there may be more +sources of taint in a Perl program. + +To avoid these new tainting behaviors, you can build Perl with the +Configure option C<-Accflags=-DINCOMPLETE_TAINTS>. Beware that the +ensuing perl binary may be insecure. + +=back + +=head2 C Source Incompatibilities + +=over 4 + +=item C<PERL_POLLUTE> + +Release 5.005 grandfathered old global symbol names by providing preprocessor +macros for extension source compatibility. As of release 5.6.0, these +preprocessor definitions are not available by default. You need to explicitly +compile perl with C<-DPERL_POLLUTE> to get these definitions. For +extensions still using the old symbols, this option can be +specified via MakeMaker: + + perl Makefile.PL POLLUTE=1 + +=item C<PERL_IMPLICIT_CONTEXT> + +This new build option provides a set of macros for all API functions +such that an implicit interpreter/thread context argument is passed to +every API function. As a result of this, something like C<sv_setsv(foo,bar)> +amounts to a macro invocation that actually translates to something like +C<Perl_sv_setsv(my_perl,foo,bar)>. While this is generally expected +to not have any significant source compatibility issues, the difference +between a macro and a real function call will need to be considered. + +This means that there B<is> a source compatibility issue as a result of +this if your extensions attempt to use pointers to any of the Perl API +functions. + +Note that the above issue is not relevant to the default build of +Perl, whose interfaces continue to match those of prior versions +(but subject to the other options described here). + +See L<perlguts/"The Perl API"> for detailed information on the +ramifications of building Perl with this option. + + NOTE: PERL_IMPLICIT_CONTEXT is automatically enabled whenever Perl is built + with one of -Dusethreads, -Dusemultiplicity, or both. It is not + intended to be enabled by users at this time. + +=item C<PERL_POLLUTE_MALLOC> + +Enabling Perl's malloc in release 5.005 and earlier caused the namespace of +the system's malloc family of functions to be usurped by the Perl versions, +since by default they used the same names. Besides causing problems on +platforms that do not allow these functions to be cleanly replaced, this +also meant that the system versions could not be called in programs that +used Perl's malloc. Previous versions of Perl have allowed this behaviour +to be suppressed with the HIDEMYMALLOC and EMBEDMYMALLOC preprocessor +definitions. + +As of release 5.6.0, Perl's malloc family of functions have default names +distinct from the system versions. You need to explicitly compile perl with +C<-DPERL_POLLUTE_MALLOC> to get the older behaviour. HIDEMYMALLOC +and EMBEDMYMALLOC have no effect, since the behaviour they enabled is now +the default. + +Note that these functions do B<not> constitute Perl's memory allocation API. +See L<perlguts/"Memory Allocation"> for further information about that. + +=back + +=head2 Compatible C Source API Changes =over -=item Can't mktemp() +=item C<PATCHLEVEL> is now C<PERL_VERSION> + +The cpp macros C<PERL_REVISION>, C<PERL_VERSION>, and C<PERL_SUBVERSION> +are now available by default from perl.h, and reflect the base revision, +patchlevel, and subversion respectively. C<PERL_REVISION> had no +prior equivalent, while C<PERL_VERSION> and C<PERL_SUBVERSION> were +previously available as C<PATCHLEVEL> and C<SUBVERSION>. + +The new names cause less pollution of the B<cpp> namespace and reflect what +the numbers have come to stand for in common practice. For compatibility, +the old names are still supported when F<patchlevel.h> is explicitly +included (as required before), so there is no source incompatibility +from the change. + +=back + +=head2 Binary Incompatibilities + +In general, the default build of this release is expected to be binary +compatible for extensions built with the 5.005 release or its maintenance +versions. However, specific platforms may have broken binary compatibility +due to changes in the defaults used in hints files. Therefore, please be +sure to always check the platform-specific README files for any notes to +the contrary. + +The usethreads or usemultiplicity builds are B<not> binary compatible +with the corresponding builds in 5.005. + +On platforms that require an explicit list of exports (AIX, OS/2 and Windows, +among others), purely internal symbols such as parser functions and the +run time opcodes are not exported by default. Perl 5.005 used to export +all functions irrespective of whether they were considered part of the +public API or not. + +For the full list of public API functions, see L<perlapi>. + +=head1 Known Problems + +=head2 Thread test failures + +The subtests 19 and 20 of lib/thr5005.t test are known to fail due to +fundamental problems in the 5.005 threading implementation. These are +not new failures--Perl 5.005_0x has the same bugs, but didn't have these +tests. + +=head2 EBCDIC platforms not supported + +In earlier releases of Perl, EBCDIC environments like OS390 (also +known as Open Edition MVS) and VM-ESA were supported. Due to changes +required by the UTF-8 (Unicode) support, the EBCDIC platforms are not +supported in Perl 5.6.0. + +=head2 In 64-bit HP-UX the lib/io_multihomed test may hang + +The lib/io_multihomed test may hang in HP-UX if Perl has been +configured to be 64-bit. Because other 64-bit platforms do not +hang in this test, HP-UX is suspect. All other tests pass +in 64-bit HP-UX. The test attempts to create and connect to +"multihomed" sockets (sockets which have multiple IP addresses). + +=head2 NEXTSTEP 3.3 POSIX test failure + +In NEXTSTEP 3.3p2 the implementation of the strftime(3) in the +operating system libraries is buggy: the %j format numbers the days of +a month starting from zero, which, while being logical to programmers, +will cause the subtests 19 to 27 of the lib/posix test may fail. + +=head2 Tru64 (aka Digital UNIX, aka DEC OSF/1) lib/sdbm test failure with gcc -(F) The mktemp() routine failed for some reason while trying to process -a B<-e> switch. Maybe your /tmp partition is full, or clobbered. +If compiled with gcc 2.95 the lib/sdbm test will fail (dump core). +The cure is to use the vendor cc, it comes with the operating system +and produces good code. -Removed because B<-e> doesn't use temporary files any more. +=head2 UNICOS/mk CC failures during Configure run -=item Can't write to temp file for B<-e>: %s +In UNICOS/mk the following errors may appear during the Configure run: -(F) The write routine failed for some reason while trying to process -a B<-e> switch. Maybe your /tmp partition is full, or clobbered. + Guessing which symbols your C compiler and preprocessor define... + CC-20 cc: ERROR File = try.c, Line = 3 + ... + bad switch yylook 79bad switch yylook 79bad switch yylook 79bad switch yylook 79#ifdef A29K + ... + 4 errors detected in the compilation of "try.c". -Removed because B<-e> doesn't use temporary files any more. +The culprit is the broken awk of UNICOS/mk. The effect is fortunately +rather mild: Perl itself is not adversely affected by the error, only +the h2ph utility coming with Perl, and that is rather rarely needed +these days. -=item Cannot open temporary file +=head2 Arrow operator and arrays -(F) The create routine failed for some reason while trying to process -a B<-e> switch. Maybe your /tmp partition is full, or clobbered. +When the left argument to the arrow operator C<< -> >> is an array, or +the C<scalar> operator operating on an array, the result of the +operation must be considered erroneous. For example: -Removed because B<-e> doesn't use temporary files any more. + @x->[2] + scalar(@x)->[2] + +These expressions will get run-time errors in some future release of +Perl. + +=head2 Windows 2000 + +Windows 2000 is known to fail test 22 in lib/open3.t (cause unknown at +this time). That test passes under Windows NT. + +=head2 Experimental features + +As discussed above, many features are still experimental. Interfaces and +implementation of these features are subject to change, and in extreme cases, +even subject to removal in some future release of Perl. These features +include the following: + +=over 4 + +=item Threads + +=item Unicode + +=item 64-bit support + +=item Lvalue subroutines + +=item Weak references + +=item The pseudo-hash data type + +=item The Compiler suite + +=item Internal implementation of file globbing + +=item The DB module + +=item The regular expression constructs C<(?{ code })> and C<(??{ code })> + +=back + +=head1 Obsolete Diagnostics + +=over 4 + +=item Character class syntax [: :] is reserved for future extensions + +(W) Within regular expression character classes ([]) the syntax beginning +with "[:" and ending with ":]" is reserved for future extensions. +If you need to represent those character sequences inside a regular +expression character class, just quote the square brackets with the +backslash: "\[:" and ":\]". + +=item Ill-formed logical name |%s| in prime_env_iter + +(W) A warning peculiar to VMS. A logical name was encountered when preparing +to iterate over %ENV which violates the syntactic rules governing logical +names. Because it cannot be translated normally, it is skipped, and will not +appear in %ENV. This may be a benign occurrence, as some software packages +might directly modify logical name tables and introduce nonstandard names, +or it may indicate that a logical name table has been corrupted. + +=item Probable precedence problem on %s + +(W) The compiler found a bareword where it expected a conditional, +which often indicates that an || or && was parsed as part of the +last argument of the previous construct, for example: + + open FOO || die; =item regexp too big @@ -987,26 +2920,31 @@ the regular expression compiles to longer than 32767, it'll blow up. Usually when you want a regular expression this big, there is a better way to do it with multiple statements. See L<perlre>. -=back +=item Use of "$$<digit>" to mean "${$}<digit>" is deprecated -=head1 Configuration Changes +(D) Perl versions before 5.004 misinterpreted any type marker followed +by "$" and a digit. For example, "$$0" was incorrectly taken to mean +"${$}0" instead of "${$0}". This bug is (mostly) fixed in Perl 5.004. -You can use "Configure -Uinstallusrbinperl" which causes installperl -to skip installing perl also as /usr/bin/perl. This is useful if you -prefer not to modify /usr/bin for some reason or another but harmful -because many scripts assume to find Perl in /usr/bin/perl. +However, the developers of Perl 5.004 could not fix this bug completely, +because at least two widely-used modules depend on the old meaning of +"$$0" in a string. So Perl 5.004 still interprets "$$<digit>" in the +old (broken) way inside strings; but it generates this message as a +warning. And in Perl 5.005, this special treatment will cease. + +=back -=head1 BUGS +=head1 Reporting Bugs -If you find what you think is a bug, you might check the headers of -recently posted articles in the comp.lang.perl.misc newsgroup. +If you find what you think is a bug, you might check the +articles recently posted to the comp.lang.perl.misc newsgroup. There may also be information at http://www.perl.com/perl/, the Perl Home Page. If you believe you have an unreported bug, please run the B<perlbug> -program included with your release. Make sure you trim your bug down +program included with your release. Be sure to trim your bug down to a tiny but sufficient test case. Your bug report, along with the -output of C<perl -V>, will be sent off to <F<perlbug@perl.com>> to be +output of C<perl -V>, will be sent off to perlbug@perl.com to be analysed by the Perl porting team. =head1 SEE ALSO @@ -1021,8 +2959,8 @@ The F<Artistic> and F<Copying> files for copyright information. =head1 HISTORY -Written by Gurusamy Sarathy <F<gsar@umich.edu>>, with many contributions -from The Perl Porters. +Written by Gurusamy Sarathy <F<gsar@activestate.com>>, with many +contributions from The Perl Porters. Send omissions or corrections to <F<perlbug@perl.com>>. diff --git a/contrib/perl5/pod/perldiag.pod b/contrib/perl5/pod/perldiag.pod index fe31991..9ed7552 100644 --- a/contrib/perl5/pod/perldiag.pod +++ b/contrib/perl5/pod/perldiag.pod @@ -9,79 +9,167 @@ desperation): (W) A warning (optional). (D) A deprecation (optional). - (S) A severe warning (mandatory). + (S) A severe warning (default). (F) A fatal error (trappable). (P) An internal error you should never see (trappable). (X) A very fatal error (nontrappable). (A) An alien error message (not generated by Perl). -Optional warnings are enabled by using the B<-w> switch. Warnings may -be captured by setting C<$SIG{__WARN__}> to a reference to a routine that -will be called on each warning instead of printing it. See L<perlvar>. +The majority of messages from the first three classifications above (W, +D & S) can be controlled using the C<warnings> pragma. + +If a message can be controlled by the C<warnings> pragma, its warning +category is included with the classification letter in the description +below. + +Optional warnings are enabled by using the C<warnings> pragma or the B<-w> +and B<-W> switches. Warnings may be captured by setting C<$SIG{__WARN__}> +to a reference to a routine that will be called on each warning instead +of printing it. See L<perlvar>. + +Default warnings are always enabled unless they are explicitly disabled +with the C<warnings> pragma or the B<-X> switch. + Trappable errors may be trapped using the eval operator. See -L<perlfunc/eval>. +L<perlfunc/eval>. In almost all cases, warnings may be selectively +disabled or promoted to fatal errors using the C<warnings> pragma. +See L<warnings>. Some of these messages are generic. Spots that vary are denoted with a %s, just as in a printf format. Note that some messages start with a %s! -The symbols C<"%(-?@> sort before the letters, while C<[> and C<\> sort after. +Since the messages are listed in alphabetical order, the symbols +C<"%(-?@> sort before the letters, while C<[> and C<\> sort after. =over 4 -=item "my" variable %s can't be in a package +=item "%s" variable %s masks earlier declaration in same %s -(F) Lexically scoped variables aren't in a package, so it doesn't make sense -to try to declare one with a package qualifier on the front. Use local() -if you want to localize a package variable. - -=item "my" variable %s masks earlier declaration in same %s - -(W) A lexical variable has been redeclared in the current scope or statement, +(W misc) A "my" or "our" variable has been redeclared in the current scope or statement, effectively eliminating all access to the previous instance. This is almost always a typographical error. Note that the earlier variable will still exist until the end of the scope or until all closure referents to it are destroyed. +=item "my sub" not yet implemented + +(F) Lexically scoped subroutines are not yet implemented. Don't try that +yet. + +=item "my" variable %s can't be in a package + +(F) Lexically scoped variables aren't in a package, so it doesn't make sense +to try to declare one with a package qualifier on the front. Use local() +if you want to localize a package variable. + =item "no" not allowed in expression (F) The "no" keyword is recognized and executed at compile time, and returns no useful value. See L<perlmod>. +=item "our" variable %s redeclared + +(W misc) You seem to have already declared the same global once before in the +current lexical scope. + =item "use" not allowed in expression (F) The "use" keyword is recognized and executed at compile time, and returns no useful value. See L<perlmod>. +=item '!' allowed only after types %s + +(F) The '!' is allowed in pack() and unpack() only after certain types. +See L<perlfunc/pack>. + +=item / cannot take a count + +(F) You had an unpack template indicating a counted-length string, +but you have also specified an explicit size for the string. +See L<perlfunc/pack>. + +=item / must be followed by a, A or Z + +(F) You had an unpack template indicating a counted-length string, +which must be followed by one of the letters a, A or Z +to indicate what sort of string is to be unpacked. +See L<perlfunc/pack>. + +=item / must be followed by a*, A* or Z* + +(F) You had a pack template indicating a counted-length string, +Currently the only things that can have their length counted are a*, A* or Z*. +See L<perlfunc/pack>. + +=item / must follow a numeric type + +(F) You had an unpack template that contained a '#', +but this did not follow some numeric unpack specification. +See L<perlfunc/pack>. + =item % may only be used in unpack (F) You can't pack a string by supplying a checksum, because the checksumming process loses information, and you can't go the other way. See L<perlfunc/unpack>. +=item /%s/: Unrecognized escape \\%c passed through + +(W regexp) You used a backslash-character combination which is not recognized +by Perl. This combination appears in an interpolated variable or a +C<'>-delimited regular expression. The character was understood literally. + +=item /%s/: Unrecognized escape \\%c in character class passed through + +(W regexp) You used a backslash-character combination which is not recognized +by Perl inside character classes. The character was understood literally. + +=item /%s/ should probably be written as "%s" + +(W syntax) You have used a pattern where Perl expected to find a string, +as in the first argument to C<join>. Perl will treat the true +or false result of matching the pattern against $_ as the string, +which is probably not what you had in mind. + =item %s (...) interpreted as function -(W) You've run afoul of the rule that says that any list operator followed +(W syntax) You've run afoul of the rule that says that any list operator followed by parentheses turns into a function, with all the list operators arguments found inside the parentheses. See L<perlop/Terms and List Operators (Leftward)>. -=item %s argument is not a HASH element +=item %s() called too early to check prototype + +(W prototype) You've called a function that has a prototype before the parser saw a +definition or declaration for it, and Perl could not check that the call +conforms to the prototype. You need to either add an early prototype +declaration for the subroutine in question, or move the subroutine +definition ahead of the call to get proper prototype checking. Alternatively, +if you are certain that you're calling the function correctly, you may put +an ampersand before the name to avoid the warning. See L<perlsub>. + +=item %s argument is not a HASH or ARRAY element -(F) The argument to exists() must be a hash element, such as +(F) The argument to exists() must be a hash or array element, such as: $foo{$bar} - $ref->[12]->{"susie"} + $ref->{"susie"}[12] -=item %s argument is not a HASH element or slice +=item %s argument is not a HASH or ARRAY element or slice -(F) The argument to delete() must be either a hash element, such as +(F) The argument to delete() must be either a hash or array element, such as: $foo{$bar} - $ref->[12]->{"susie"} + $ref->{"susie"}[12] -or a hash slice, such as +or a hash or array slice, such as: - @foo{$bar, $baz, $xyzzy} + @foo[$bar, $baz, $xyzzy] @{$ref->[12]}{"susie", "queue"} +=item %s argument is not a subroutine name + +(F) The argument to exists() for C<exists &sub> must be a subroutine +name, and not a subroutine call. C<exists &sub()> will generate this error. + =item %s did not return a true value (F) A required (or used) file must return a true value to indicate that @@ -107,14 +195,21 @@ Further error messages would likely be uninformative. =item %s matches null string many times -(W) The pattern you've specified would be an infinite loop if the +(W regexp) The pattern you've specified would be an infinite loop if the regular expression engine didn't specifically check for that. See L<perlre>. =item %s never introduced -(S) The symbol in question was declared but somehow went out of scope +(S internal) The symbol in question was declared but somehow went out of scope before it could possibly have been used. +=item %s package attribute may clash with future reserved word: %s + +(W reserved) A lowercase attribute name was used that had a package-specific handler. +That name might have a meaning to Perl itself some day, even though it +doesn't yet. Perhaps you should use a mixed-case attribute name, instead. +See L<attributes>. + =item %s syntax OK (F) The final summary message when a C<perl -c> succeeds. @@ -143,9 +238,9 @@ Perl yourself. instead of Perl. Check the #! line, or manually feed your script into Perl yourself. -=item (in cleanup) %s +=item (in cleanup) %s -(W) This prefix usually indicates that a DESTROY() method raised +(W misc) This prefix usually indicates that a DESTROY() method raised the indicated exception. Since destructors are usually called by the system at arbitrary points during execution, and often a vast number of times, the warning is issued only once for any number @@ -155,7 +250,7 @@ repeated. Failure of user callbacks dispatched using the C<G_KEEPERR> flag could also result in this warning. See L<perlcall/G_KEEPERR>. -=item (Missing semicolon on previous line?) +=item (Missing semicolon on previous line?) (S) This is an educated guess made in conjunction with the message "%s found where operator expected". Don't automatically put a semicolon on @@ -191,9 +286,14 @@ if you meant it literally. See L<perlre>. (F) You had a pack template that specified an absolute position outside the string being unpacked. See L<perlfunc/pack>. -=item accept() on closed fd +=item <> should be quotes -(W) You tried to do an accept on a closed socket. Did you forget to check +(F) You wrote C<< require <file> >> when you should have written +C<require 'file'>. + +=item accept() on closed socket %s + +(W closed) You tried to do an accept on a closed socket. Did you forget to check the return value of your socket() call? See L<perlfunc/accept>. =item Allocation too large: %lx @@ -202,7 +302,7 @@ the return value of your socket() call? See L<perlfunc/accept>. =item Applying %s to %s will act on scalar(%s) -(W) The pattern match (//), substitution (s///), and transliteration (tr///) +(W misc) The pattern match (//), substitution (s///), and transliteration (tr///) operators work on scalar values. If you apply one of them to an array or a hash, it will convert the array or hash to a scalar value -- the length of an array, or the population info of a hash -- and then work on @@ -215,13 +315,13 @@ L<perlfunc/grep> and L<perlfunc/map> for alternatives. =item Ambiguous use of %s resolved as %s -(W)(S) You said something that may not be interpreted the way +(W ambiguous)(S) You said something that may not be interpreted the way you thought. Normally it's pretty easy to disambiguate it by supplying a missing quote, operator, parenthesis pair or declaration. =item Ambiguous call resolved as CORE::%s(), qualify as such or use & -(W) A subroutine you have declared has the same name as a Perl keyword, +(W ambiguous) A subroutine you have declared has the same name as a Perl keyword, and you have used the name without qualification for calling one or the other. Perl decided to call the builtin because the subroutine is not imported. @@ -233,7 +333,8 @@ imported with the C<use subs> pragma). To silently interpret it as the Perl operator, use the C<CORE::> prefix on the operator (e.g. C<CORE::log($x)>) or by declaring the subroutine -to be an object method (see L<attrs>). +to be an object method (see L<perlsub/"Subroutine Attributes"> +or L<attributes>). =item Args must match #! line @@ -244,13 +345,13 @@ for example, turn C<-w -U> into C<-wU>. =item Argument "%s" isn't numeric%s -(W) The indicated string was fed as an argument to an operator that +(W numeric) The indicated string was fed as an argument to an operator that expected a numeric value instead. If you're fortunate the message will identify which operator was so unfortunate. =item Array @%s missing the @ in argument %d of %s() -(D) Really old Perl let you omit the @ on array names in some spots. This +(D deprecated) Really old Perl let you omit the @ on array names in some spots. This is now heavily deprecated. =item assertion botched: %s @@ -269,20 +370,20 @@ know which context to supply to the right side. =item Attempt to free non-arena SV: 0x%lx -(P) All SV objects are supposed to be allocated from arenas that will +(P internal) All SV objects are supposed to be allocated from arenas that will be garbage collected on exit. An SV was discovered to be outside any of those arenas. =item Attempt to free nonexistent shared string -(P) Perl maintains a reference counted internal table of strings to +(P internal) Perl maintains a reference counted internal table of strings to optimize the storage and access of hash keys and other strings. This indicates someone tried to decrement the reference count of a string that can no longer be found in the table. =item Attempt to free temp prematurely -(W) Mortalized values are supposed to be freed by the free_tmps() +(W debugging) Mortalized values are supposed to be freed by the free_tmps() routine. This indicates that something else is freeing the SV before the free_tmps() routine gets a chance, which means that the free_tmps() routine will be freeing an unreferenced scalar when it does try to free @@ -290,20 +391,26 @@ it. =item Attempt to free unreferenced glob pointers -(P) The reference counts got screwed up on symbol aliases. +(P internal) The reference counts got screwed up on symbol aliases. =item Attempt to free unreferenced scalar -(W) Perl went to decrement the reference count of a scalar to see if it +(W internal) Perl went to decrement the reference count of a scalar to see if it would go to 0, and discovered that it had already gone to 0 earlier, and should have been freed, and in fact, probably was freed. This could indicate that SvREFCNT_dec() was called too many times, or that SvREFCNT_inc() was called too few times, or that the SV was mortalized when it shouldn't have been, or that memory has been corrupted. +=item Attempt to join self + +(F) You tried to join a thread from within itself, which is an +impossible task. You may be joining the wrong thread, or you may +need to move the join() to some other thread. + =item Attempt to pack pointer to temporary value -(W) You tried to pass a temporary value (like the result of a +(W pack) You tried to pass a temporary value (like the result of a function, or a computed expression) to the "p" pack() template. This means the result contains a pointer to a location that could become invalid anytime, even before the end of the current statement. Use @@ -312,7 +419,7 @@ avoid this warning. =item Attempt to use reference as lvalue in substr -(W) You supplied a reference as the first argument to substr() used +(W substr) You supplied a reference as the first argument to substr() used as an lvalue, which is pretty strange. Perhaps you forgot to dereference it first. See L<perlfunc/substr>. @@ -323,6 +430,12 @@ shmctl(). In C parlance, the correct sizes are, respectively, S<sizeof(struct msqid_ds *)>, S<sizeof(struct semid_ds *)>, and S<sizeof(struct shmid_ds *)>. +=item Bad evalled substitution pattern + +(F) You've used the /e switch to evaluate the replacement for a +substitution, but perl found a syntax error in the code to evaluate, +most likely an unexpected right brace '}'. + =item Bad filehandle: %s (F) A symbol was passed to something wanting a filehandle, but the symbol @@ -331,7 +444,7 @@ did it in another package. =item Bad free() ignored -(S) An internal routine called free() on something that had never been +(S malloc) An internal routine called free() on something that had never been malloc()ed in the first place. Mandatory, but can be disabled by setting environment variable C<PERL_BADFREE> to 1. @@ -364,6 +477,12 @@ is not the same as $var = 'myvar'; $sym = "mypack::$var"; +=item Bad realloc() ignored + +(S malloc) An internal routine called realloc() on something that had never been +malloc()ed in the first place. Mandatory, but can be disabled by +setting environment variable C<PERL_BADFREE> to 1. + =item Bad symbol for array (P) An internal request asked to add an array entry to something that @@ -393,10 +512,26 @@ Perhaps you need to predeclare a subroutine? =item Bareword "%s" refers to nonexistent package -(W) You used a qualified bareword of the form C<Foo::>, but +(W bareword) You used a qualified bareword of the form C<Foo::>, but the compiler saw no other uses of that namespace before that point. Perhaps you need to predeclare a package? +=item Bareword found in conditional + +(W bareword) The compiler found a bareword where it expected a conditional, +which often indicates that an || or && was parsed as part of the +last argument of the previous construct, for example: + + open FOO || die; + +It may also indicate a misspelled constant that has been interpreted +as a bareword: + + use constant TYPO => 1; + if (TYOP) { print "foo" } + +The C<strict> pragma is useful in avoiding such errors. + =item BEGIN failed--compilation aborted (F) An untrapped exception was raised while executing a BEGIN subroutine. @@ -410,21 +545,37 @@ already occurred. Since the intended environment for the C<BEGIN {}> could not be guaranteed (due to the errors), and since subsequent code likely depends on its correct operation, Perl just gave up. -=item bind() on closed fd +=item Binary number > 0b11111111111111111111111111111111 non-portable + +(W portable) The binary number you specified is larger than 2**32-1 +(4294967295) and therefore non-portable between systems. See +L<perlport> for more on portability concerns. -(W) You tried to do a bind on a closed socket. Did you forget to check +=item bind() on closed socket %s + +(W closed) You tried to do a bind on a closed socket. Did you forget to check the return value of your socket() call? See L<perlfunc/bind>. +=item Bit vector size > 32 non-portable + +(W portable) Using bit vector sizes larger than 32 is non-portable. + =item Bizarre copy of %s in %s (P) Perl detected an attempt to copy an internal value that is not copiable. +=item Buffer overflow in prime_env_iter: %s + +(W internal) A warning peculiar to VMS. While Perl was preparing to iterate over +%ENV, it encountered a logical name or symbol definition which was too long, +so it was truncated to the string shown. + =item Callback called exit -(F) A subroutine invoked from an external package via perl_call_sv() +(F) A subroutine invoked from an external package via call_sv() exited by calling exit. -=item Can't "goto" outside a block +=item Can't "goto" out of a pseudo block (F) A "goto" statement was executed to jump out of what might look like a block, except that it isn't a proper block. This usually @@ -436,30 +587,40 @@ is a no-no. See L<perlfunc/goto>. (F) A "goto" statement was executed to jump into the middle of a foreach loop. You can't get there from here. See L<perlfunc/goto>. -=item Can't "last" outside a block +=item Can't "last" outside a loop block (F) A "last" statement was executed to break out of the current block, except that there's this itty bitty problem called there isn't a current block. Note that an "if" or "else" block doesn't count as a -"loopish" block, as doesn't a block given to sort(). You can usually double -the curlies to get the same effect though, because the inner curlies -will be considered a block that loops once. See L<perlfunc/last>. +"loopish" block, as doesn't a block given to sort(), map() or grep(). +You can usually double the curlies to get the same effect though, +because the inner curlies will be considered a block that loops once. +See L<perlfunc/last>. -=item Can't "next" outside a block +=item Can't "next" outside a loop block (F) A "next" statement was executed to reiterate the current block, but there isn't a current block. Note that an "if" or "else" block doesn't -count as a "loopish" block, as doesn't a block given to sort(). You can -usually double the curlies to get the same effect though, because the inner -curlies will be considered a block that loops once. See L<perlfunc/next>. +count as a "loopish" block, as doesn't a block given to sort(), map() +or grep(). You can usually double the curlies to get the same effect +though, because the inner curlies will be considered a block that +loops once. See L<perlfunc/next>. -=item Can't "redo" outside a block +=item Can't read CRTL environ + +(S) A warning peculiar to VMS. Perl tried to read an element of %ENV +from the CRTL's internal environment array and discovered the array was +missing. You need to figure out where your CRTL misplaced its environ +or define F<PERL_ENV_TABLES> (see L<perlvms>) so that environ is not searched. + +=item Can't "redo" outside a loop block (F) A "redo" statement was executed to restart the current block, but there isn't a current block. Note that an "if" or "else" block doesn't -count as a "loopish" block, as doesn't a block given to sort(). You can -usually double the curlies to get the same effect though, because the inner -curlies will be considered a block that loops once. See L<perlfunc/redo>. +count as a "loopish" block, as doesn't a block given to sort(), map() +or grep(). You can usually double the curlies to get the same effect +though, because the inner curlies will be considered a block that +loops once. See L<perlfunc/redo>. =item Can't bless non-reference value @@ -468,7 +629,7 @@ encapsulation of objects. See L<perlobj>. =item Can't break at that line -(S) A warning intended to only be printed while running within the debugger, indicating +(S internal) A warning intended to only be printed while running within the debugger, indicating the line number specified wasn't the location of a statement that could be stopped at. @@ -551,14 +712,20 @@ only with arrays that have a hash reference at index 0. (P) An error peculiar to VMS. The process is suffering from exhausted quotas or other plumbing problems. -=item Can't declare %s in my +=item Can't declare class for non-scalar %s in "%s" + +(S) Currently, only scalar variables can declared with a specific class +qualifier in a "my" or "our" declaration. The semantics may be extended +for other types of variables in future. + +=item Can't declare %s in "%s" -(F) Only scalar, array, and hash variables may be declared as lexical variables. -They must have ordinary identifiers as names. +(F) Only scalar, array, and hash variables may be declared as "my" or +"our" variables. They must have ordinary identifiers as names. =item Can't do inplace edit on %s: %s -(S) The creation of the new file failed for the indicated reason. +(S inplace) The creation of the new file failed for the indicated reason. =item Can't do inplace edit without backup @@ -566,13 +733,15 @@ They must have ordinary identifiers as names. from a deleted (but still opened) file. You have to say C<-i.bak>, or some such. -=item Can't do inplace edit: %s E<gt> 14 characters +=item Can't do inplace edit: %s would not be unique -(S) There isn't enough room in the filename to make a backup name for the file. +(S inplace) Your filesystem does not support filenames longer than 14 +characters and Perl was unable to create a unique filename during +inplace editing with the B<-i> switch. The file was ignored. =item Can't do inplace edit: %s is not a regular file -(S) You tried to use the B<-i> switch on a special file, such as a file in +(S inplace) You tried to use the B<-i> switch on a special file, such as a file in /dev, or a FIFO. The file was ignored. =item Can't do setegid! @@ -598,7 +767,7 @@ your sysadmin why he and/or she removed it. (F) This machine doesn't have either waitpid() or wait4(), so only waitpid() without flags is emulated. -=item Can't do {n,m} with n E<gt> m +=item Can't do {n,m} with n > m (F) Minima must be less than or equal to maxima. If you really want your regexp to match something 0 times, just put {0}. See L<perlre>. @@ -610,7 +779,7 @@ For example, it'd be kind of silly to put a B<-x> on the #! line. =item Can't exec "%s": %s -(W) An system(), exec(), or piped open call could not execute the named +(W exec) An system(), exec(), or piped open call could not execute the named program for the indicated reason. Typical reasons include: the permissions were wrong on the file, the file wasn't found in C<$ENV{PATH}>, the executable in question was compiled for another architecture, or the @@ -699,6 +868,15 @@ L<perlfunc/goto>. (F) The "goto subroutine" call can't be used to jump out of an eval "string". (You can use it to jump out of an eval {BLOCK}, but you probably don't want to.) +=item Can't ignore signal CHLD, forcing to default + +(W signal) Perl has detected that it is being run with the SIGCHLD signal +(sometimes known as SIGCLD) disabled. Since disabling this signal +will interfere with proper determination of exit status of child +processes, Perl has reset the signal to its default value. +This situation typically indicates that the parent program under +which Perl may be running (e.g. cron) is being very careless. + =item Can't localize through a reference (F) You said something like C<local $$ref>, which Perl can't currently @@ -715,10 +893,10 @@ package name. =item Can't localize pseudo-hash element -(F) You said something like C<local $ar-E<gt>{'key'}>, where $ar is +(F) You said something like C<< local $ar->{'key'} >>, where $ar is a reference to a pseudo-hash. That hasn't been implemented yet, but you can get a similar effect by localizing the corresponding array -element directly -- C<local $ar-E<gt>[$ar-E<gt>[0]{'key'}]>. +element directly -- C<< local $ar->[$ar->[0]{'key'}] >>. =item Can't locate auto/%s.al in @INC @@ -727,13 +905,15 @@ but there is no function to autoload. Most probable causes are a misprint in a function/method name or a failure to C<AutoSplit> the file, say, by doing C<make install>. -=item Can't locate %s in @INC +=item Can't locate %s -(F) You said to do (or require, or use) a file that couldn't be found -in any of the libraries mentioned in @INC. Perhaps you need to set the -PERL5LIB or PERL5OPT environment variable to say where the extra library -is, or maybe the script needs to add the library name to @INC. Or maybe -you just misspelled the name of the file. See L<perlfunc/require>. +(F) You said to C<do> (or C<require>, or C<use>) a file that couldn't be +found. Perl looks for the file in all the locations mentioned in @INC, +unless the file name included the full path to the file. Perhaps you need +to set the PERL5LIB or PERL5OPT environment variable to say where the extra +library is, or maybe the script needs to add the library name to @INC. Or +maybe you just misspelled the name of the file. See L<perlfunc/require> +and L<lib>. =item Can't locate object method "%s" via package "%s" @@ -743,7 +923,7 @@ method, nor does any of its base classes. See L<perlobj>. =item Can't locate package %s for @%s::ISA -(W) The @ISA array contained the name of another package that doesn't seem +(W syntax) The @ISA array contained the name of another package that doesn't seem to exist. =item Can't make list assignment to \%ENV on this system @@ -755,6 +935,11 @@ to exist. (F) You aren't allowed to assign to the item indicated, or otherwise try to change it, such as with an auto-increment. +=item Can't modify non-lvalue subroutine call + +(F) Subroutines meant to be used in lvalue context should be declared as +such, see L<perlsub/"Lvalue subroutines">. + =item Can't modify nonexistent substring (P) The internal routine that does assignment to a substr() was handed @@ -767,7 +952,7 @@ buffer. =item Can't open %s: %s -(S) The implicit opening of a file through use of the C<E<lt>E<gt>> +(S inplace) The implicit opening of a file through use of the C<< <> >> filehandle, either implicitly under the C<-n> or C<-p> command-line switches, or explicitly, failed for the indicated reason. Usually this is because you don't have read permission for a file which you named @@ -775,26 +960,26 @@ on the command line. =item Can't open bidirectional pipe -(W) You tried to say C<open(CMD, "|cmd|")>, which is not supported. You can +(W pipe) You tried to say C<open(CMD, "|cmd|")>, which is not supported. You can try any of several modules in the Perl library to do this, such as -IPC::Open2. Alternately, direct the pipe's output to a file using "E<gt>", +IPC::Open2. Alternately, direct the pipe's output to a file using ">", and then read it in under a different file handle. =item Can't open error file %s as stderr (F) An error peculiar to VMS. Perl does its own command line redirection, and -couldn't open the file specified after '2E<gt>' or '2E<gt>E<gt>' on the +couldn't open the file specified after '2>' or '2>>' on the command line for writing. =item Can't open input file %s as stdin (F) An error peculiar to VMS. Perl does its own command line redirection, and -couldn't open the file specified after 'E<lt>' on the command line for reading. +couldn't open the file specified after '<' on the command line for reading. =item Can't open output file %s as stdout (F) An error peculiar to VMS. Perl does its own command line redirection, and -couldn't open the file specified after 'E<gt>' or 'E<gt>E<gt>' on the command +couldn't open the file specified after '>' or '>>' on the command line for writing. =item Can't open output pipe (name: %s) @@ -813,10 +998,16 @@ pointers into them. You tried to redefine one such sort subroutine when it was currently active, which is not allowed. If you really want to do this, you should write C<sort { &func } @x> instead of C<sort func @x>. +=item Can't remove %s: %s, skipping file + +(S inplace) You requested an inplace edit without creating a backup file. Perl +was unable to remove the original file to replace it with the modified +file. The file was left unmodified. + =item Can't rename %s to %s: %s, skipping file -(S) The rename done by the B<-i> switch failed for some reason, probably because -you don't have write permission to the directory. +(S inplace) The rename done by the B<-i> switch failed for some reason, +probably because you don't have write permission to the directory. =item Can't reopen input pipe (name: %s) in binary mode @@ -833,6 +1024,12 @@ of suidperl. (F) The return statement was executed in mainline code, that is, where there was no subroutine call to return out of. See L<perlsub>. +=item Can't return %s from lvalue subroutine + +(F) Perl detected an attempt to return illegal lvalues (such +as temporary or readonly values) from a subroutine used as an lvalue. +This is not allowed. + =item Can't stat script "%s" (P) For some reason you can't fstat() the script even though you have @@ -889,7 +1086,7 @@ provide symbolic names for C<$!> errno values. =item Can't use "my %s" in sort comparison (F) The global variables $a and $b are reserved for sort comparisons. -You mentioned $a or $b in the same line as the E<lt>=E<gt> or cmp operator, +You mentioned $a or $b in the same line as the <=> or cmp operator, and the variable had earlier been declared as a lexical variable. Either qualify the sort variable with the package name, or rename the lexical variable. @@ -904,15 +1101,15 @@ lexical variable. reference of the type needed. You can use the ref() function to test the type of the reference, if need be. -=item Can't use \1 to mean $1 in expression +=item Can't use \%c to mean $%c in expression -(W) In an ordinary expression, backslash is a unary operator that creates +(W syntax) In an ordinary expression, backslash is a unary operator that creates a reference to its argument. The use of backslash to indicate a backreference to a matched substring is valid only as part of a regular expression pattern. Trying to do this in ordinary Perl code produces a value that prints out looking like SCALAR(0xdecaf). Use the $1 form instead. -=item Can't use bareword ("%s") as %s ref while \"strict refs\" in use +=item Can't use bareword ("%s") as %s ref while "strict refs" in use (F) Only hard references are allowed by "strict refs". Symbolic references are disallowed. See L<perlref>. @@ -941,59 +1138,69 @@ weren't. subscript. But to the left of the brackets was an expression that didn't look like an array reference, or anything else subscriptable. +=item Can't weaken a nonreference + +(F) You attempted to weaken something that was not a reference. Only +references can be weakened. + =item Can't x= to read-only value (F) You tried to repeat a constant value (often the undefined value) with an assignment operator, which implies modifying the value itself. Perhaps you need to copy the value to a temporary, and repeat that. -=item Cannot find an opnumber for "%s" +=item Can't find an opnumber for "%s" (F) A string of a form C<CORE::word> was given to prototype(), but there is no builtin with the name C<word>. -=item Cannot resolve method `%s' overloading `%s' in package `%s' +=item Can't resolve method `%s' overloading `%s' in package `%s' (F|P) Error resolving overloading specified by a method name (as opposed to a subroutine reference): no such method callable via the package. If method name is C<???>, this is an internal error. +=item Character class [:%s:] unknown + +(F) The class in the character class [: :] syntax is unknown. +See L<perlre>. + +=item Character class syntax [%s] belongs inside character classes + +(W unsafe) The character class constructs [: :], [= =], and [. .] go +I<inside> character classes, the [] are part of the construct, +for example: /[012[:alpha:]345]/. Note that [= =] and [. .] +are not currently implemented; they are simply placeholders for +future extensions. + =item Character class syntax [. .] is reserved for future extensions -(W) Within regular expression character classes ([]) the syntax beginning +(W regexp) Within regular expression character classes ([]) the syntax beginning with "[." and ending with ".]" is reserved for future extensions. If you need to represent those character sequences inside a regular expression character class, just quote the square brackets with the backslash: "\[." and ".\]". -=item Character class syntax [: :] is reserved for future extensions - -(W) Within regular expression character classes ([]) the syntax beginning -with "[:" and ending with ":]" is reserved for future extensions. -If you need to represent those character sequences inside a regular -expression character class, just quote the square brackets with the -backslash: "\[:" and ":\]". - =item Character class syntax [= =] is reserved for future extensions -(W) Within regular expression character classes ([]) the syntax +(W regexp) Within regular expression character classes ([]) the syntax beginning with "[=" and ending with "=]" is reserved for future extensions. If you need to represent those character sequences inside a regular expression character class, just quote the square brackets with the backslash: "\[=" and "=\]". -=item chmod: mode argument is missing initial 0 +=item chmod() mode argument is missing initial 0 -(W) A novice will sometimes say +(W chmod) A novice will sometimes say chmod 777, $filename not realizing that 777 will be interpreted as a decimal number, equivalent to 01411. Octal constants are introduced with a leading 0 in Perl, as in C. -=item Close on unopened file E<lt>%sE<gt> +=item Close on unopened file <%s> -(W) You tried to close a filehandle that was never opened. +(W unopened) You tried to close a filehandle that was never opened. =item Compilation failed in require @@ -1003,7 +1210,7 @@ were severe enough to halt compilation immediately. =item Complex regular subexpression recursion limit (%d) exceeded -(W) The regular expression engine uses recursion in complex situations +(W regexp) The regular expression engine uses recursion in complex situations where back-tracking is required. Recursion depth is limited to 32766, or perhaps less in architectures where the stack cannot grow arbitrarily. ("Simple" and "medium" situations are handled without @@ -1013,9 +1220,9 @@ than in the regular expression engine; or rewriting the regular expression so that it is simpler or backtracks less. (See L<perlbook> for information on I<Mastering Regular Expressions>.) -=item connect() on closed fd +=item connect() on closed socket %s -(W) You tried to do a connect on a closed socket. Did you forget to check +(W closed) You tried to do a connect on a closed socket. Did you forget to check the return value of your socket() call? See L<perlfunc/connect>. =item Constant is not %s reference @@ -1028,20 +1235,31 @@ See L<perlsub/"Constant Functions"> and L<constant>. =item Constant subroutine %s redefined -(S) You redefined a subroutine which had previously been eligible for +(S|W redefine) You redefined a subroutine which had previously been eligible for inlining. See L<perlsub/"Constant Functions"> for commentary and workarounds. =item Constant subroutine %s undefined -(S) You undefined a subroutine which had previously been eligible for +(W misc) You undefined a subroutine which had previously been eligible for inlining. See L<perlsub/"Constant Functions"> for commentary and workarounds. +=item constant(%s): %s + +(F) The parser found inconsistencies either while attempting to define an +overloaded constant, or when trying to find the character name specified +in the C<\N{...}> escape. Perhaps you forgot to load the corresponding +C<overload> or C<charnames> pragma? See L<charnames> and L<overload>. + =item Copy method did not return a reference (F) The method which overloads "=" is buggy. See L<overload/Copy Constructor>. +=item CORE::%s is not a keyword + +(F) The CORE:: namespace is reserved for Perl keywords. + =item Corrupt malloc ptr 0x%lx at 0x%lx (P) The malloc package that comes with Perl had an internal failure. @@ -1058,22 +1276,43 @@ a valid magic number. =item Deep recursion on subroutine "%s" -(W) This subroutine has called itself (directly or indirectly) 100 +(W recursion) This subroutine has called itself (directly or indirectly) 100 times more than it has returned. This probably indicates an infinite recursion, unless you're writing strange benchmark programs, in which case it indicates something else. +=item defined(@array) is deprecated + +(D deprecated) defined() is not usually useful on arrays because it checks for an +undefined I<scalar> value. If you want to see if the array is empty, +just use C<if (@array) { # not empty }> for example. + +=item defined(%hash) is deprecated + +(D deprecated) defined() is not usually useful on hashes because it checks for an +undefined I<scalar> value. If you want to see if the hash is empty, +just use C<if (%hash) { # not empty }> for example. + =item Delimiter for here document is too long -(F) In a here document construct like C<E<lt>E<lt>FOO>, the label +(F) In a here document construct like C<<<FOO>, the label C<FOO> is too long for Perl to handle. You have to be seriously twisted to write code that triggers this error. -=item Did you mean &%s instead? +=item Did not produce a valid header + +See Server error. + +=item (Did you mean &%s instead?) (W) You probably referred to an imported subroutine &FOO as $FOO or some such. -=item Did you mean $ or @ instead of %? +=item (Did you mean "local" instead of "our"?) + +(W misc) Remember that "our" does not localize the declared global variable. +You have declared it again in the same lexical scope, which seems superfluous. + +=item (Did you mean $ or @ instead of %?) (W) You probably said %hash{$key} when you meant $hash{$key} or @hash{@keys}. On the other hand, maybe you just meant %hash and got carried away. @@ -1083,7 +1322,7 @@ On the other hand, maybe you just meant %hash and got carried away. (F) You passed die() an empty string (the equivalent of C<die "">) or you called it with no args and both C<$@> and C<$_> were empty. -=item Do you need to predeclare %s? +=item (Do you need to predeclare %s?) (S) This is an educated guess made in conjunction with the message "%s found where operator expected". It often means a subroutine or module @@ -1095,6 +1334,10 @@ to define the subroutine or package before the current location. You can use an empty "sub foo;" or "package FOO;" to enter a "forward" declaration. +=item Document contains no data + +See Server error. + =item Don't know how to handle magic of type '%s' (P) The internal handling of magical variables has been cursed. @@ -1105,7 +1348,7 @@ declaration. =item Duplicate free() ignored -(S) An internal routine called free() on something that had already +(S malloc) An internal routine called free() on something that had already been freed. =item elseif should be elsif @@ -1115,10 +1358,16 @@ ugly. Your code will be interpreted as an attempt to call a method named "elseif" for the class returned by the following block. This is unlikely to be what you want. -=item END failed--cleanup aborted +=item %s failed--call queue aborted + +(F) An untrapped exception was raised while executing a CHECK, INIT, or +END subroutine. Processing of the remainder of the queue of such +routines has been prematurely ended. + +=item entering effective %s failed -(F) An untrapped exception was raised while executing an END subroutine. -The interpreter is immediately exited. +(F) While under the C<use filetest> pragma, switching the real and +effective uids or gids failed. =item Error converting file specification %s @@ -1162,32 +1411,44 @@ variable and glob that. =item Exiting eval via %s -(W) You are exiting an eval by unconventional means, such as +(W exiting) You are exiting an eval by unconventional means, such as +a goto, or a loop control statement. + +=item Exiting format via %s + +(W exiting) You are exiting an eval by unconventional means, such as a goto, or a loop control statement. =item Exiting pseudo-block via %s -(W) You are exiting a rather special block construct (like a sort block or +(W exiting) You are exiting a rather special block construct (like a sort block or subroutine) by unconventional means, such as a goto, or a loop control statement. See L<perlfunc/sort>. =item Exiting subroutine via %s -(W) You are exiting a subroutine by unconventional means, such as +(W exiting) You are exiting a subroutine by unconventional means, such as a goto, or a loop control statement. =item Exiting substitution via %s -(W) You are exiting a substitution by unconventional means, such as +(W exiting) You are exiting a substitution by unconventional means, such as a return, a goto, or a loop control statement. =item Explicit blessing to '' (assuming package main) -(W) You are blessing a reference to a zero length string. This has +(W misc) You are blessing a reference to a zero length string. This has the effect of blessing the reference into the package main. This is usually not what you want. Consider providing a default target package, e.g. bless($ref, $p || 'MyPackage'); +=item false [] range "%s" in regexp + +(W regexp) A character class range must start and end at a literal character, not +another character class like C<\d> or C<[:alpha:]>. The "-" in your false +range is interpreted as a literal "-". Consider quoting the "-", "\-". +See L<perlre>. + =item Fatal VMS error at %s, line %d (P) An error peculiar to VMS. Something untoward happened in a VMS system @@ -1202,24 +1463,24 @@ PDP-11 or something? =item Filehandle %s never opened -(W) An I/O operation was attempted on a filehandle that was never initialized. +(W unopened) An I/O operation was attempted on a filehandle that was never initialized. You need to do an open() or a socket() call, or call a constructor from the FileHandle package. -=item Filehandle %s opened for only input +=item Filehandle %s opened only for input -(W) You tried to write on a read-only filehandle. If you +(W io) You tried to write on a read-only filehandle. If you intended it to be a read-write filehandle, you needed to open it with -"+E<lt>" or "+E<gt>" or "+E<gt>E<gt>" instead of with "E<lt>" or nothing. If -you intended only to write the file, use "E<gt>" or "E<gt>E<gt>". See +"+<" or "+>" or "+>>" instead of with "<" or nothing. If +you intended only to write the file, use ">" or ">>". See L<perlfunc/open>. -=item Filehandle opened for only input +=item Filehandle %s opened only for output -(W) You tried to write on a read-only filehandle. If you -intended it to be a read-write filehandle, you needed to open it with -"+E<lt>" or "+E<gt>" or "+E<gt>E<gt>" instead of with "E<lt>" or nothing. If -you intended only to write the file, use "E<gt>" or "E<gt>E<gt>". See +(W io) You tried to read from a filehandle opened only for writing. If you +intended it to be a read/write filehandle, you needed to open it with +"+<" or "+>" or "+>>" instead of with "<" or nothing. If +you intended only to read from the file, use "<". See L<perlfunc/open>. =item Final $ should be \$ or $name @@ -1236,12 +1497,18 @@ a literal "at" sign, or was meant to introduce a variable name that happens to be missing. So you have to put either the backslash or the name. +=item flock() on closed filehandle %s + +(W closed) The filehandle you're attempting to flock() got itself closed some +time before now. Check your logic flow. flock() operates on filehandles. +Are you attempting to call flock() on a dirhandle by the same name? + =item Format %s redefined -(W) You redefined a format. To suppress this warning, say +(W redefine) You redefined a format. To suppress this warning, say { - local $^W = 0; + no warnings; eval "format NAME =..."; } @@ -1252,7 +1519,7 @@ to the end of your file without finding such a line. =item Found = in conditional, should be == -(W) You said +(W syntax) You said if ($foo = 123) @@ -1272,9 +1539,9 @@ when you meant because if it did, it'd feel morally obligated to return every hostname on the Internet. -=item get{sock,peer}name() on closed fd +=item get%sname() on closed socket %s -(W) You tried to get a socket or peer socket name on a closed socket. +(W closed) You tried to get a socket or peer socket name on a closed socket. Did you forget to check the return value of your socket() call? =item getpwnam returned invalid UIC %#o for user "%s" @@ -1282,6 +1549,20 @@ Did you forget to check the return value of your socket() call? (S) A warning peculiar to VMS. The call to C<sys$getuai> underlying the C<getpwnam> operator returned an invalid UIC. +=item glob failed (%s) + +(W glob) Something went wrong with the external program(s) used for C<glob> +and C<< <*.c> >>. Usually, this means that you supplied a C<glob> +pattern that caused the external program to fail and exit with a nonzero +status. If the message indicates that the abnormal exit resulted in a +coredump, this may also mean that your csh (C shell) is broken. If so, +you should change all of the csh-related variables in config.sh: If you +have tcsh, make the variables refer to it as if it were csh (e.g. +C<full_csh='/usr/bin/tcsh'>); otherwise, make them all empty (except that +C<d_csh> should be C<'undef'>) so that Perl will think csh is missing. +In either case, after editing config.sh, run C<./Configure -S> and +rebuild Perl. + =item Glob not terminated (F) The lexer saw a left angle bracket in a place where it was expecting @@ -1292,8 +1573,9 @@ the line, and you really meant a "less than". =item Global symbol "%s" requires explicit package name (F) You've said "use strict vars", which indicates that all variables -must either be lexically scoped (using "my"), or explicitly qualified to -say which package the global variable is in (using "::"). +must either be lexically scoped (using "my"), declared beforehand using +"our", or explicitly qualified to say which package the global variable +is in (using "::"). =item goto must have label @@ -1302,15 +1584,21 @@ unspecified destination. See L<perlfunc/goto>. =item Had to create %s unexpectedly -(S) A routine asked for a symbol from a symbol table that ought to have +(S internal) A routine asked for a symbol from a symbol table that ought to have existed already, but for some reason it didn't, and had to be created on an emergency basis to prevent a core dump. =item Hash %%s missing the % in argument %d of %s() -(D) Really old Perl let you omit the % on hash names in some spots. This +(D deprecated) Really old Perl let you omit the % on hash names in some spots. This is now heavily deprecated. +=item Hexadecimal number > 0xffffffff non-portable + +(W portable) The hexadecimal number you specified is larger than 2**32-1 +(4294967295) and therefore non-portable between systems. See +L<perlport> for more on portability concerns. + =item Identifier too long (F) Perl limits identifiers (names for variables, functions, etc.) to @@ -1318,33 +1606,26 @@ about 250 characters for simple names, and somewhat more for compound names (like C<$A::B>). You've exceeded Perl's limits. Future versions of Perl are likely to eliminate these arbitrary limitations. -=item Ill-formed logical name |%s| in prime_env_iter +=item Ill-formed CRTL environ value "%s" -(W) A warning peculiar to VMS. A logical name was encountered when preparing -to iterate over %ENV which violates the syntactic rules governing logical -names. Because it cannot be translated normally, it is skipped, and will not -appear in %ENV. This may be a benign occurrence, as some software packages -might directly modify logical name tables and introduce nonstandard names, -or it may indicate that a logical name table has been corrupted. +(W internal) A warning peculiar to VMS. Perl tried to read the CRTL's internal +environ array, and encountered an element without the C<=> delimiter +used to spearate keys from values. The element is ignored. -=item Illegal character %s (carriage return) - -(F) A carriage return character was found in the input. This is an -error, and not a warning, because carriage return characters can break -multi-line strings, including here documents (e.g., C<print E<lt>E<lt>EOF;>). +=item Ill-formed message in prime_env_iter: |%s| -Under Unix, this error is usually caused by executing Perl code -- -either the main program, a module, or an eval'd string -- that was -transferred over a network connection from a non-Unix system without -properly converting the text file format. +(W internal) A warning peculiar to VMS. Perl tried to read a logical name +or CLI symbol definition when preparing to iterate over %ENV, and +didn't see the expected delimiter between key and value, so the +line was ignored. -Under systems that use something other than '\n' to delimit lines of -text, this error can also be caused by reading Perl code from a file -handle that is in binary mode (as set by the C<binmode> operator). +=item Illegal character %s (carriage return) -In either case, the Perl code in question will probably need to be -converted with something like C<s/\x0D\x0A?/\n/g> before it can be -executed. +(F) Perl normally treats carriage returns in the program text as it +would any other whitespace, which means you should never see this +error when Perl was built using standard options. For some reason, +your version of Perl appears to have been built without this support. +Talk to your Perl administrator. =item Illegal division by zero @@ -1356,21 +1637,35 @@ logic, or you need to put a conditional in to guard against meaningless input. (F) You tried to divide a number by 0 to get the remainder. Most numbers don't take to this kindly. -=item Illegal octal digit +=item Illegal binary digit %s + +(F) You used a digit other than 0 or 1 in a binary number. + +=item Illegal octal digit %s (F) You used an 8 or 9 in a octal number. -=item Illegal octal digit ignored +=item Illegal binary digit %s ignored + +(W digit) You may have tried to use a digit other than 0 or 1 in a binary number. +Interpretation of the binary number stopped before the offending digit. + +=item Illegal octal digit %s ignored -(W) You may have tried to use an 8 or 9 in a octal number. Interpretation +(W digit) You may have tried to use an 8 or 9 in a octal number. Interpretation of the octal number stopped before the 8 or 9. -=item Illegal hex digit ignored +=item Illegal hexadecimal digit %s ignored -(W) You may have tried to use a character other than 0 - 9 or A - F in a -hexadecimal number. Interpretation of the hexadecimal number stopped +(W digit) You may have tried to use a character other than 0 - 9 or A - F, a - f +in a hexadecimal number. Interpretation of the hexadecimal number stopped before the illegal character. +=item Illegal number of bits in vec + +(F) The number of bits in vec() (the third argument) must be a power of +two from 1 to 32 (or 64, if your platform supports that). + =item Illegal switch in PERL5OPT: %s (X) The PERL5OPT environment variable may only be used to set the @@ -1410,17 +1705,17 @@ C<$ENV{ENV}> or C<$ENV{BASH_ENV}> are derived from data supplied (or potentially supplied) by the user. The script must set the path to a known value, using trustworthy data. See L<perlsec>. -=item Integer overflow in hex number - -(S) The literal hex number you have specified is too big for your -architecture. On a 32-bit architecture the largest hex literal is -0xFFFFFFFF. - -=item Integer overflow in octal number +=item Integer overflow in %s number -(S) The literal octal number you have specified is too big for your -architecture. On a 32-bit architecture the largest octal literal is -037777777777. +(W overflow) The hexadecimal, octal or binary number you have specified either +as a literal or as an argument to hex() or oct() is too big for your +architecture, and has been converted to a floating point number. On a +32-bit architecture the largest hexadecimal, octal or binary number +representable without overflow is 0xFFFFFFFF, 037777777777, or +0b11111111111111111111111111111111 respectively. Note that Perl +transparently promotes all numbers to a floating point representation +internally--subject to loss of precision errors in subsequent +operations. =item Internal inconsistency in tracking vforks @@ -1436,44 +1731,47 @@ and execute the specified command. (P) Something went badly wrong in the regular expression parser. -=item glob failed (%s) - -(W) Something went wrong with the external program(s) used for C<glob> -and C<E<lt>*.cE<gt>>. Usually, this means that you supplied a C<glob> -pattern that caused the external program to fail and exit with a nonzero -status. If the message indicates that the abnormal exit resulted in a -coredump, this may also mean that your csh (C shell) is broken. If so, -you should change all of the csh-related variables in config.sh: If you -have tcsh, make the variables refer to it as if it were csh (e.g. -C<full_csh='/usr/bin/tcsh'>); otherwise, make them all empty (except that -C<d_csh> should be C<'undef'>) so that Perl will think csh is missing. -In either case, after editing config.sh, run C<./Configure -S> and -rebuild Perl. - =item internal urp in regexp at /%s/ (P) Something went badly awry in the regular expression parser. -=item invalid [] range in regexp +=item Invalid %s attribute: %s + +The indicated attribute for a subroutine or variable was not recognized +by Perl or by a user-supplied handler. See L<attributes>. + +=item Invalid %s attributes: %s + +The indicated attributes for a subroutine or variable were not recognized +by Perl or by a user-supplied handler. See L<attributes>. + +=item invalid [] range "%s" in regexp (F) The range specified in a character class had a minimum character greater than the maximum character. See L<perlre>. =item Invalid conversion in %s: "%s" -(W) Perl does not understand the given format conversion. +(W printf) Perl does not understand the given format conversion. See L<perlfunc/sprintf>. +=item Invalid separator character %s in attribute list + +(F) Something other than a colon or whitespace was seen between the +elements of an attribute list. If the previous attribute +had a parenthesised parameter list, perhaps that list was terminated +too soon. See L<attributes>. + =item Invalid type in pack: '%s' (F) The given character is not a valid pack type. See L<perlfunc/pack>. -(W) The given character is not a valid pack type but used to be silently +(W pack) The given character is not a valid pack type but used to be silently ignored. =item Invalid type in unpack: '%s' (F) The given character is not a valid unpack type. See L<perlfunc/unpack>. -(W) The given character is not a valid unpack type but used to be silently +(W unpack) The given character is not a valid unpack type but used to be silently ignored. =item ioctl is not implemented @@ -1503,16 +1801,31 @@ L<perlfunc/last>. that name, not even if you count where you were called from. See L<perlfunc/last>. -=item listen() on closed fd +=item leaving effective %s failed + +(F) While under the C<use filetest> pragma, switching the real and +effective uids or gids failed. -(W) You tried to do a listen on a closed socket. Did you forget to check +=item listen() on closed socket %s + +(W closed) You tried to do a listen on a closed socket. Did you forget to check the return value of your socket() call? See L<perlfunc/listen>. +=item Lvalue subs returning %s not implemented yet + +(F) Due to limitations in the current implementation, array and hash +values cannot be returned in subroutines used in lvalue context. +See L<perlsub/"Lvalue subroutines">. + =item Method for operation %s not found in package %s during blessing (F) An attempt was made to specify an entry in an overloading table that doesn't resolve to a valid subroutine. See L<overload>. +=item Method %s not permitted + +See Server error. + =item Might be a runaway multi-line %s string starting on line %d (S) An advisory indicating that the previous error may have been caused @@ -1521,7 +1834,7 @@ ended earlier on the current line. =item Misplaced _ in number -(W) An underline in a decimal constant wasn't on a 3-digit boundary. +(W syntax) An underline in a decimal constant wasn't on a 3-digit boundary. =item Missing $ on loop variable @@ -1529,21 +1842,31 @@ ended earlier on the current line. mentioned with the $ in Perl, unlike in the shells, where it can vary from one line to the next. +=item Missing %sbrace%s on \N{} + +(F) Wrong syntax of character name literal C<\N{charname}> within +double-quotish context. + =item Missing comma after first argument to %s function (F) While certain functions allow you to specify a filehandle or an "indirect object" before the argument list, this ain't one of them. -=item Missing operator before %s? +=item Missing command in piped open + +(W pipe) You used the C<open(FH, "| command")> or C<open(FH, "command |")> +construction, but the command was missing or blank. + +=item (Missing operator before %s?) (S) This is an educated guess made in conjunction with the message "%s found where operator expected". Often the missing operator is a comma. -=item Missing right bracket +=item Missing right curly or square bracket -(F) The lexer counted more opening curly brackets (braces) than closing ones. -As a general rule, you'll find it's missing near the place you were last -editing. +(F) The lexer counted more opening curly or square brackets than +closing ones. As a general rule, you'll find it's missing near the place +you were last editing. =item Modification of a read-only value attempted @@ -1577,15 +1900,20 @@ be created for some peculiar reason. =item Multidimensional syntax %s not supported -(W) Multidimensional arrays aren't written like C<$foo[1,2,3]>. They're written +(W syntax) Multidimensional arrays aren't written like C<$foo[1,2,3]>. They're written like C<$foo[1][2][3]>, as in C. +=item Missing name in "my sub" + +(F) The reserved syntax for lexically scoped subroutines requires that they +have a name with which they can be found. + =item Name "%s::%s" used only once: possible typo -(W) Typographical errors often show up as unique variable names. +(W once) Typographical errors often show up as unique variable names. If you had a good reason for having a unique name, then just mention -it again somehow to suppress the message. The C<use vars> pragma is -provided for just this purpose. +it again somehow to suppress the message. The C<our> declaration is +provided for this purpose. =item Negative length @@ -1616,6 +1944,11 @@ See L<perlsec>. (F) A setuid script can't be specified by the user. +=item No %s specified for -%c + +(F) The indicated command line switch needs a mandatory argument, but +you haven't specified one. + =item No comma allowed after %s (F) A list operator that has a filehandle or "indirect object" is not @@ -1661,30 +1994,36 @@ but for some reason the perl5db.pl file (or some facsimile thereof) didn't define a DB::sub routine to be called at the beginning of each ordinary subroutine call. -=item No error file after 2E<gt> or 2E<gt>E<gt> on command line +=item No error file after 2> or 2>> on command line (F) An error peculiar to VMS. Perl handles its own command line redirection, -and found a '2E<gt>' or a '2E<gt>E<gt>' on the command line, but can't find +and found a '2>' or a '2>>' on the command line, but can't find the name of the file to which to write data destined for stderr. -=item No input file after E<lt> on command line +=item No input file after < on command line (F) An error peculiar to VMS. Perl handles its own command line redirection, -and found a 'E<lt>' on the command line, but can't find the name of the file +and found a '<' on the command line, but can't find the name of the file from which to read data for stdin. -=item No output file after E<gt> on command line +=item No output file after > on command line (F) An error peculiar to VMS. Perl handles its own command line redirection, -and found a lone 'E<gt>' at the end of the command line, so it doesn't know +and found a lone '>' at the end of the command line, so it doesn't know where you wanted to redirect stdout. -=item No output file after E<gt> or E<gt>E<gt> on command line +=item No output file after > or >> on command line (F) An error peculiar to VMS. Perl handles its own command line redirection, -and found a 'E<gt>' or a 'E<gt>E<gt>' on the command line, but can't find the +and found a '>' or a '>>' on the command line, but can't find the name of the file to which to write data destined for stdout. +=item No package name allowed for variable %s in "our" + +(F) Fully qualified variable names are not allowed in "our" declarations, +because that doesn't make much sense under existing semantics. Such +syntax is reserved for future extensions. + =item No Perl script found in input (F) You called C<perl -x>, but no line was found in the file beginning @@ -1700,18 +2039,18 @@ your system. (F) Configure didn't find anything resembling the setreuid() call for your system. -=item No space allowed after B<-I> +=item No space allowed after -%c -(F) The argument to B<-I> must follow the B<-I> immediately with no -intervening space. +(F) The argument to the indicated command line switch must follow immediately +after the switch, without intervening spaces. -=item No such array field +=item No such pseudo-hash field "%s" (F) You tried to access an array as a hash, but the field name used is not defined. The hash at index 0 should map all valid field names to array indices for that to work. -=item No such field "%s" in variable %s of type %s +=item No such pseudo-hash field "%s" in variable %s of type %s (F) You tried to access a field of a typed variable where the type does not know about the field name. The field names are looked up in @@ -1726,9 +2065,17 @@ an attempt to close an unopened filehandle. =item No such signal: SIG%s -(W) You specified a signal name as a subscript to %SIG that was not recognized. +(W signal) You specified a signal name as a subscript to %SIG that was not recognized. Say C<kill -l> in your shell to see the valid signal names on your system. +=item no UTC offset information; assuming local time is UTC + +(S) A warning peculiar to VMS. Perl was unable to find the local +timezone offset, so it's assuming that local system time is equivalent +to UTC. If it's not, define the logical name F<SYS$TIMEZONE_DIFFERENTIAL> +to translate to the number of seconds which need to be added to UTC to +get local time. + =item Not a CODE reference (F) Perl was trying to evaluate a reference to a code value (that is, a @@ -1790,7 +2137,7 @@ function to find out what kind of ref it really was. See L<perlref>. =item Not enough format arguments -(W) A format specified more picture fields than the next line supplied. +(W syntax) A format specified more picture fields than the next line supplied. See L<perlform>. =item Null filename used @@ -1806,7 +2153,7 @@ supplied it an uninitialized value. See L<perlform>. =item NULL OP IN RUN -(P) Some internal routine called run() with a null opcode pointer. +(P debugging) Some internal routine called run() with a null opcode pointer. =item Null realloc @@ -1827,9 +2174,23 @@ about 250 characters. You've exceeded that length. Future versions of Perl are likely to eliminate this arbitrary limitation. In the meantime, try using scientific notation (e.g. "1e6" instead of "1_000_000"). +=item Octal number > 037777777777 non-portable + +(W portable) The octal number you specified is larger than 2**32-1 (4294967295) +and therefore non-portable between systems. See L<perlport> for more +on portability concerns. + +See also L<perlport> for writing portable code. + +=item Octal number in vector unsupported + +(F) Numbers with a leading C<0> are not currently allowed in vectors. The +octal number interpretation of such numbers may be supported in a future +version. + =item Odd number of elements in hash assignment -(S) You specified an odd number of elements to initialize a hash, which +(W misc) You specified an odd number of elements to initialize a hash, which is odd, because hashes come in key/value pairs. =item Offset outside string @@ -1841,11 +2202,11 @@ will extend the buffer and zero pad the new area. =item oops: oopsAV -(S) An internal warning that the grammar is screwed up. +(S internal) An internal warning that the grammar is screwed up. =item oops: oopsHV -(S) An internal warning that the grammar is screwed up. +(S internal) An internal warning that the grammar is screwed up. =item Operation `%s': no method found, %s @@ -1857,12 +2218,18 @@ true. See L<overload>. =item Operator or semicolon missing before %s -(S) You used a variable or subroutine call where the parser was +(S ambiguous) You used a variable or subroutine call where the parser was expecting an operator. The parser has assumed you really meant to use an operator, but this is highly likely to be incorrect. For example, if you say "*foo *foo" it will be interpreted as if you said "*foo * 'foo'". +=item Out of memory! + +(X) The malloc() function returned 0, indicating there was insufficient +remaining memory (or virtual memory) to satisfy the request. Perl +has no option but to exit immediately. + =item Out of memory for yacc stack (F) The yacc parser wanted to grow its stack so it could continue parsing, @@ -1894,7 +2261,7 @@ instead of C<$arr[$time]>. =item page overflow -(W) A single call to write() produced more lines than can fit on a page. +(W io) A single call to write() produced more lines than can fit on a page. See L<perlform>. =item panic: ck_grep @@ -1910,6 +2277,11 @@ See L<perlform>. (P) The savestack was requested to restore more localized values than there are in the savestack. +=item panic: del_backref + +(P) Failed an internal consistency check while trying to reset a weak +reference. + =item panic: die %s (P) We popped the context stack to an eval context, and then discovered @@ -1948,6 +2320,10 @@ and then discovered it wasn't a context we know how to do a goto in. (P) The lexer got into a bad state parsing a string with brackets. +=item panic: kid popen errno read + +(F) forked child returned an incomprehensible message about its errno. + =item panic: last (P) We popped the context stack to a block context, and then discovered @@ -1966,6 +2342,11 @@ invalid enum on the top of it. (P) Something requested a negative number of bytes of malloc. +=item panic: magic_killbackrefs + +(P) Failed an internal consistency check while trying to reset all weak +references to an object. + =item panic: mapstart (P) The compiler is screwed up with respect to the map() function. @@ -2041,9 +2422,13 @@ was string. (P) The lexer got into a bad state while processing a case modifier. +=item panic: %s + +(P) An internal error. + =item Parentheses missing around "%s" list -(W) You said something like +(W parenthesis) You said something like my $foo, $bar = @_; @@ -2051,7 +2436,7 @@ when you meant my ($foo, $bar) = @_; -Remember that "my" and "local" bind closer than comma. +Remember that "my", "our", and "local" bind tighter than comma. =item Perl %3.3f required--this is only version %s, stopped @@ -2063,20 +2448,25 @@ anyway? See L<perlfunc/require>. (F) The setuid emulator in suidperl decided you were up to no good. -=item pid %d not a child +=item pid %x not a child -(W) A warning peculiar to VMS. Waitpid() was asked to wait for a process which +(W exec) A warning peculiar to VMS. Waitpid() was asked to wait for a process which isn't a subprocess of the current process. While this is fine from VMS' perspective, it's probably not what you intended. =item POSIX getpgrp can't take an argument -(F) Your C compiler uses POSIX getpgrp(), which takes no argument, unlike +(F) Your system has POSIX getpgrp(), which takes no argument, unlike the BSD version, which takes a pid. +=item Possible Y2K bug: %s + +(W y2k) You are concatenating the number 19 with another number, which +could be a potential Year 2000 problem. + =item Possible attempt to put comments in qw() list -(W) qw() lists contain items separated by whitespace; as with literal +(W qw) qw() lists contain items separated by whitespace; as with literal strings, comment characters are not ignored, but are instead treated as literal data. (You may have used different delimiters than the parentheses shown here; braces are also frequently used.) @@ -2105,7 +2495,7 @@ old-fashioned way, with quotes and commas: =item Possible attempt to separate words with commas -(W) qw() lists contain items separated by whitespace; therefore commas +(W qw) qw() lists contain items separated by whitespace; therefore commas aren't needed to separate the items. (You may have used different delimiters than the parentheses shown here; braces are also frequently used.) @@ -2126,9 +2516,27 @@ Perl guesses a reasonable buffer size, but puts a sentinel byte at the end of the buffer just in case. This sentinel byte got clobbered, and Perl assumes that memory is now corrupted. See L<perlfunc/ioctl>. +=item pragma "attrs" is deprecated, use "sub NAME : ATTRS" instead + +(W deprecated) You have written somehing like this: + + sub doit + { + use attrs qw(locked); + } + +You should use the new declaration syntax instead. + + sub doit : locked + { + ... + +The C<use attrs> pragma is now obsolete, and is only provided for +backward-compatibility. See L<perlsub/"Subroutine Attributes">. + =item Precedence problem: open %s should be open(%s) -(S) The old irregular construct +(S precedence) The old irregular construct open FOO || die; @@ -2141,27 +2549,23 @@ and list operators. (The old open was a little of both.) You must put parentheses around the filehandle, or use the new "or" operator instead of "||". -=item print on closed filehandle %s +=item Premature end of script headers -(W) The filehandle you're printing on got itself closed sometime before now. -Check your logic flow. +See Server error. -=item printf on closed filehandle %s +=item print() on closed filehandle %s -(W) The filehandle you're writing to got itself closed sometime before now. +(W closed) The filehandle you're printing on got itself closed sometime before now. Check your logic flow. -=item Probable precedence problem on %s - -(W) The compiler found a bareword where it expected a conditional, -which often indicates that an || or && was parsed as part of the -last argument of the previous construct, for example: +=item printf() on closed filehandle %s - open FOO || die; +(W closed) The filehandle you're writing to got itself closed sometime before now. +Check your logic flow. =item Prototype mismatch: %s vs %s -(S) The subroutine being declared or defined had previously been declared +(S unsafe) The subroutine being declared or defined had previously been declared or defined with a different function prototype. =item Range iterator outside integer range @@ -2171,18 +2575,23 @@ are outside the range which can be represented by integers internally. One possible workaround is to force Perl to use magical string increment by prepending "0" to your numbers. -=item Read on closed filehandle E<lt>%sE<gt> +=item readline() on closed filehandle %s -(W) The filehandle you're reading from got itself closed sometime before now. +(W closed) The filehandle you're reading from got itself closed sometime before now. Check your logic flow. +=item realloc() of freed memory ignored + +(S malloc) An internal routine called realloc() on something that had already +been freed. + =item Reallocation too large: %lx (F) You can't allocate more than 64K on an MS-DOS machine. =item Recompile perl with B<-D>DEBUGGING to use B<-D> switch -(F) You can't use the B<-D> option unless the code to produce the +(F debugging) You can't use the B<-D> option unless the code to produce the desired output is compiled into Perl, which entails some overhead, which is why it's currently left out of your copy. @@ -2198,7 +2607,7 @@ method. Probably indicates an unintended loop in your inheritance hierarchy. =item Reference found where even-sized list expected -(W) You gave a single reference where Perl was expecting a list with +(W misc) You gave a single reference where Perl was expecting a list with an even number of elements (for assignment to a hash). This usually means that you used the anon hash constructor when you meant to use parens. In any case, a hash requires key/value B<pairs>. @@ -2208,9 +2617,14 @@ to use parens. In any case, a hash requires key/value B<pairs>. %hash = ( one => 1, two => 2, ); # right %hash = qw( one 1 two 2 ); # also fine +=item Reference is already weak + +(W misc) You have attempted to weaken a reference that is already weak. +Doing so has no effect. + =item Reference miscount in sv_replace() -(W) The internal sv_replace() function was handed a new SV with a +(W internal) The internal sv_replace() function was handed a new SV with a reference count of other than 1. =item regexp *+ operand could be empty @@ -2227,17 +2641,19 @@ expression compiler gave it. (P) A "can't happen" error, because safemalloc() should have caught it earlier. -=item regexp too big +=item Repeat count in pack overflows -(F) The current implementation of regular expressions uses shorts as -address offsets within a string. Unfortunately this means that if -the regular expression compiles to longer than 32767, it'll blow up. -Usually when you want a regular expression this big, there is a better -way to do it with multiple statements. See L<perlre>. +(F) You can't specify a repeat count so large that it overflows +your signed integers. See L<perlfunc/pack>. + +=item Repeat count in unpack overflows + +(F) You can't specify a repeat count so large that it overflows +your signed integers. See L<perlfunc/unpack>. =item Reversed %s= operator -(W) You wrote your assignment operator backwards. The = must always +(W syntax) You wrote your assignment operator backwards. The = must always comes last, to avoid ambiguity with subsequent unary operators. =item Runaway format @@ -2250,7 +2666,7 @@ shifting or popping (for array variables). See L<perlform>. =item Scalar value @%s[%s] better written as $%s[%s] -(W) You've used an array slice (indicated by @) to select a single element of +(W syntax) You've used an array slice (indicated by @) to select a single element of an array. Generally it's better to ask for a scalar value (indicated by $). The difference is that C<$foo[&bar]> always behaves like a scalar, both when assigning to it and when evaluating its argument, while C<@foo[&bar]> behaves @@ -2264,7 +2680,7 @@ L<perlref>. =item Scalar value @%s{%s} better written as $%s{%s} -(W) You've used a hash slice (indicated by @) to select a single element of +(W syntax) You've used a hash slice (indicated by @) to select a single element of a hash. Generally it's better to ask for a scalar value (indicated by $). The difference is that C<$foo{&bar}> always behaves like a scalar, both when assigning to it and when evaluating its argument, while C<@foo{&bar}> behaves @@ -2289,7 +2705,7 @@ Missing the leading C<$> from a variable C<$m> may cause this error. =item %sseek() on unopened file -(W) You tried to use the seek() or sysseek() function on a filehandle that +(W unopened) You tried to use the seek() or sysseek() function on a filehandle that was either never opened or has since been closed. =item select not implemented @@ -2302,17 +2718,17 @@ was either never opened or has since been closed. =item semi-panic: attempt to dup freed string -(S) The internal newSVsv() routine was called to duplicate a scalar +(S internal) The internal newSVsv() routine was called to duplicate a scalar that had previously been marked as free. =item Semicolon seems to be missing -(W) A nearby syntax error was probably caused by a missing semicolon, +(W semicolon) A nearby syntax error was probably caused by a missing semicolon, or possibly some other missing operator, such as a comma. -=item Send on closed socket +=item send() on closed socket %s -(W) The filehandle you're sending to got itself closed sometime before now. +(W closed) The socket you're sending to got itself closed sometime before now. Check your logic flow. =item Sequence (? incomplete @@ -2337,7 +2753,12 @@ See L<perlre>. =item Server error -Also known as "500 Server error". +This is the error message generally seen in a browser window when trying +to run a CGI program (including SSI) over the web. The actual error +text varies widely from server to server. The most frequently-seen +variants are "500 Server error", "Method (something) not permitted", +"Document contains no data", "Premature end of script headers", and +"Did not produce a valid header". B<This is a CGI error, not a Perl error>. @@ -2364,10 +2785,15 @@ think so. =item seteuid() not implemented -(F) You tried to assign to C<$E<gt>>, and your operating system doesn't support +(F) You tried to assign to C<< $> >>, and your operating system doesn't support the seteuid() system call (or equivalent), or at least Configure didn't think so. +=item setpgrp can't take arguments + +(F) Your system has the setpgrp() from BSD 4.2, which takes no arguments, +unlike POSIX setpgid(), which takes a process ID and process group ID. + =item setrgid() not implemented (F) You tried to assign to C<$(>, and your operating system doesn't support @@ -2376,7 +2802,7 @@ think so. =item setruid() not implemented -(F) You tried to assign to C<$E<lt>>, and your operating system doesn't support +(F) You tried to assign to C<$<>, and your operating system doesn't support the setruid() system call (or equivalent), or at least Configure didn't think so. @@ -2389,13 +2815,13 @@ because the world might have written on it already. (F) You don't have System V shared memory IPC on your system. -=item shutdown() on closed fd +=item shutdown() on closed socket %s -(W) You tried to do a shutdown on a closed socket. Seems a bit superfluous. +(W closed) You tried to do a shutdown on a closed socket. Seems a bit superfluous. =item SIG%s handler "%s" not defined -(W) The signal handler named in %SIG doesn't, in fact, exist. Perhaps you +(W signal) The signal handler named in %SIG doesn't, in fact, exist. Perhaps you put it into the wrong package? =item sort is now a reserved word @@ -2406,7 +2832,7 @@ But before sort was a keyword, people sometimes used it as a filehandle. =item Sort subroutine didn't return a numeric value (F) A sort comparison routine must return a number. You probably blew -it by not using C<E<lt>=E<gt>> or C<cmp>, or by not using them correctly. +it by not using C<< <=> >> or C<cmp>, or by not using them correctly. See L<perlfunc/sort>. =item Sort subroutine didn't return single value @@ -2420,14 +2846,14 @@ or less than one element. See L<perlfunc/sort>. more times than there are characters of input, which is what happened.) See L<perlfunc/split>. -=item Stat on unopened file E<lt>%sE<gt> +=item Stat on unopened file <%s> -(W) You tried to use the stat() function (or an equivalent file test) +(W unopened) You tried to use the stat() function (or an equivalent file test) on a filehandle that was either never opened or has since been closed. =item Statement unlikely to be reached -(W) You did an exec() with some statement after it other than a die(). +(W exec) You did an exec() with some statement after it other than a die(). This is almost always an error, because exec() never returns unless there was a failure. You probably wanted to use system() instead, which does return. To suppress this warning, put the exec() in a block @@ -2435,7 +2861,7 @@ by itself. =item Strange *+?{} on zero-length expression -(W) You applied a regular expression quantifier in a place where it +(W regexp) You applied a regular expression quantifier in a place where it makes no sense, such as on a zero-width assertion. Try putting the quantifier inside the assertion instead. For example, the way to match "abc" provided that it is followed by three @@ -2449,10 +2875,10 @@ may break this. =item Subroutine %s redefined -(W) You redefined a subroutine. To suppress this warning, say +(W redefine) You redefined a subroutine. To suppress this warning, say { - local $^W = 0; + no warnings; eval "sub name { ... }"; } @@ -2477,10 +2903,10 @@ Missing the leading C<$> from variable C<$s> may cause this error. =item substr outside of string -(S),(W) You tried to reference a substr() that pointed outside of a +(W substr),(F) You tried to reference a substr() that pointed outside of a string. That is, the absolute value of the offset was larger than the length of the string. See L<perlfunc/substr>. This warning is -mandatory if substr is used in an lvalue context (as the left hand side +fatal if substr is used in an lvalue context (as the left hand side of an assignment or as a subroutine argument for example). =item suidperl is no longer needed since %s @@ -2488,6 +2914,11 @@ of an assignment or as a subroutine argument for example). (F) Your Perl was compiled with B<-D>SETUID_SCRIPTS_ARE_SECURE_NOW, but a version of the setuid emulator somehow got run anyway. +=item switching effective %s is not implemented + +(F) While under the C<use filetest> pragma, we cannot switch the +real and effective uids or gids. + =item syntax error (F) Probably means you had a syntax error. Common reasons include: @@ -2522,9 +2953,9 @@ into Perl yourself. machine. In some machines the functionality can exist but be unconfigured. Consult your system support. -=item Syswrite on closed filehandle +=item syswrite() on closed filehandle %s -(W) The filehandle you're writing to got itself closed sometime before now. +(W closed) The filehandle you're writing to got itself closed sometime before now. Check your logic flow. =item Target of goto is too deeply nested @@ -2534,12 +2965,12 @@ nested for Perl to reach. Perl is doing you a favor by refusing. =item tell() on unopened file -(W) You tried to use the tell() function on a filehandle that was either +(W unopened) You tried to use the tell() function on a filehandle that was either never opened or has since been closed. -=item Test on unopened file E<lt>%sE<gt> +=item Test on unopened file <%s> -(W) You tried to invoke a file test operator on a filehandle that isn't +(W unopened) You tried to invoke a file test operator on a filehandle that isn't open. Check your logic. See also L<perlfunc/-X>. =item That use of $[ is unsupported @@ -2576,6 +3007,17 @@ will deny it. if the last stat that wrote to the stat buffer already went past the symlink to get to the real file. Use an actual filename instead. +=item This Perl can't reset CRTL environ elements (%s) + +=item This Perl can't set CRTL environ elements (%s=%s) + +(W internal) Warnings peculiar to VMS. You tried to change or delete an element +of the CRTL's internal environ array, but your copy of Perl wasn't +built with a CRTL that contained the setenv() function. You'll need to +rebuild Perl with a CRTL that does, or redefine F<PERL_ENV_TABLES> (see +L<perlvms>) so that the environ array isn't the target of the change to +%ENV which produced the warning. + =item times not implemented (F) Your version of the C library apparently doesn't do times(). I suspect @@ -2608,6 +3050,14 @@ B<-T> option must appear on the command line: C<perl -T scriptname>. B<-M> or B<-m> option. This is an error because B<-M> and B<-m> options are not intended for use inside scripts. Use the C<use> pragma instead. +=item Too late to run %s block + +(W void) A CHECK or INIT block is being defined during run time proper, +when the opportunity to run them has already passed. Perhaps you are +loading a file with C<require> or C<do> when you should be using +C<use> instead. Or perhaps you should put the C<require> or C<do> +inside a BEGIN block. + =item Too many ('s =item Too many )'s @@ -2654,7 +3104,7 @@ certain type. Arrays must be @NAME or C<@{EXPR}>. Hashes must be =item umask: argument is missing initial 0 -(W) A umask of 222 is incorrect. It should be 0222, because octal +(W umask) A umask of 222 is incorrect. It should be 0222, because octal literals always start with 0 in Perl, as in C. =item umask not implemented @@ -2668,22 +3118,22 @@ to use it to restrict permissions for yourself (EXPR & 0700). =item Unbalanced context: %d more PUSHes than POPs -(W) The exit code detected an internal inconsistency in how many execution +(W internal) The exit code detected an internal inconsistency in how many execution contexts were entered and left. =item Unbalanced saves: %d more saves than restores -(W) The exit code detected an internal inconsistency in how many +(W internal) The exit code detected an internal inconsistency in how many values were temporarily localized. =item Unbalanced scopes: %d more ENTERs than LEAVEs -(W) The exit code detected an internal inconsistency in how many blocks +(W internal) The exit code detected an internal inconsistency in how many blocks were entered and left. =item Unbalanced tmps: %d more allocs than frees -(W) The exit code detected an internal inconsistency in how many mortal +(W internal) The exit code detected an internal inconsistency in how many mortal scalars were allocated and freed. =item Undefined format "%s" called @@ -2718,7 +3168,7 @@ another package? See L<perlform>. =item Undefined value assigned to typeglob -(W) An undefined value was assigned to a typeglob, a la C<*foo = undef>. +(W misc) An undefined value was assigned to a typeglob, a la C<*foo = undef>. This does nothing. It's possible that you really mean C<undef *foo>. =item unexec of %s into %s failed! @@ -2730,18 +3180,31 @@ representative, who probably put it there in the first place. (F) There are no byte-swapping functions for a machine with this byte order. +=item Unknown open() mode '%s' + +(F) The second argument of 3-argument open() is not among the list +of valid modes: C<< < >>, C<< > >>, C<<< >> >>>, C<< +< >>, +C<< +> >>, C<<< +>> >>>, C<-|>, C<|->. + +=item Unknown process %x sent message to prime_env_iter: %s + +(P) An error peculiar to VMS. Perl was reading values for %ENV before +iterating over it, and someone else stuck a message in the stream of +data Perl expected. Someone's very confused, or perhaps trying to +subvert Perl's population of %ENV for nefarious purposes. + =item unmatched () in regexp (F) Unbackslashed parentheses must always be balanced in regular expressions. If you're a vi user, the % key is valuable for finding the matching parenthesis. See L<perlre>. -=item Unmatched right bracket +=item Unmatched right %s bracket -(F) The lexer counted more closing curly brackets (braces) than opening -ones, so you're probably missing an opening bracket. As a general -rule, you'll find the missing one (so to speak) near the place you were -last editing. +(F) The lexer counted more closing curly or square brackets than +opening ones, so you're probably missing a matching opening bracket. +As a general rule, you'll find the missing one (so to speak) near the +place you were last editing. =item unmatched [] in regexp @@ -2751,7 +3214,7 @@ See L<perlre>. =item Unquoted string "%s" may clash with future reserved word -(W) You used a bareword that might someday be claimed as a reserved word. +(W reserved) You used a bareword that might someday be claimed as a reserved word. It's best to put such a word in quotes, or capitalize it somehow, or insert an underbar into it. You might also declare it as a subroutine. @@ -2761,6 +3224,11 @@ an underbar into it. You might also declare it as a subroutine. in your Perl script (or eval). Perhaps you tried to run a compressed script, a binary program, or a directory as a Perl program. +=item Unrecognized escape \\%c passed through + +(W misc) You used a backslash-character combination which is not recognized +by Perl. + =item Unrecognized signal name "%s" (F) You specified a signal name to the kill() function that was not recognized. @@ -2774,7 +3242,7 @@ supplying the bad switch on your behalf.) =item Unsuccessful %s on filename containing newline -(W) A file operation was attempted on a filename, and that operation +(W newline) A file operation was attempted on a filename, and that operation failed, PROBABLY because the filename contained a newline, PROBABLY because you forgot to chop() or chomp() it off. See L<perlfunc/chomp>. @@ -2800,33 +3268,35 @@ At least, Configure doesn't think so. (F) Your machine doesn't support the Berkeley socket mechanism, or at least that's what Configure thought. -=item Unterminated E<lt>E<gt> operator +=item Unterminated <> operator (F) The lexer saw a left angle bracket in a place where it was expecting a term, so it's looking for the corresponding right angle bracket, and not finding it. Chances are you left some needed parentheses out earlier in the line, and you really meant a "less than". -=item Use of "$$<digit>" to mean "${$}<digit>" is deprecated +=item Unterminated attribute parameter in attribute list -(D) Perl versions before 5.004 misinterpreted any type marker followed -by "$" and a digit. For example, "$$0" was incorrectly taken to mean -"${$}0" instead of "${$0}". This bug is (mostly) fixed in Perl 5.004. +(F) The lexer saw an opening (left) parenthesis character while parsing an +attribute list, but the matching closing (right) parenthesis +character was not found. You may need to add (or remove) a backslash +character to get your parentheses to balance. See L<attributes>. -However, the developers of Perl 5.004 could not fix this bug completely, -because at least two widely-used modules depend on the old meaning of -"$$0" in a string. So Perl 5.004 still interprets "$$<digit>" in the -old (broken) way inside strings; but it generates this message as a -warning. And in Perl 5.005, this special treatment will cease. +=item Unterminated attribute list + +(F) The lexer found something other than a simple identifier at the start +of an attribute, and it wasn't a semicolon or the start of a +block. Perhaps you terminated the parameter list of the previous attribute +too soon. See L<attributes>. =item Use of $# is deprecated -(D) This was an ill-advised attempt to emulate a poorly defined B<awk> feature. +(D deprecated) This was an ill-advised attempt to emulate a poorly defined B<awk> feature. Use an explicit printf() or sprintf() instead. =item Use of $* is deprecated -(D) This variable magically turned on multi-line pattern matching, both for +(D deprecated) This variable magically turned on multi-line pattern matching, both for you and for any luckless subroutine that you happen to call. You should use the new C<//m> and C<//s> modifiers now to do that without the dangerous action-at-a-distance effects of C<$*>. @@ -2836,23 +3306,23 @@ action-at-a-distance effects of C<$*>. (F) You attempted to use a feature of printf that is accessible from only C. This usually means there's a better way to do it in Perl. -=item Use of bare E<lt>E<lt> to mean E<lt>E<lt>"" is deprecated +=item Use of bare << to mean <<"" is deprecated -(D) You are now encouraged to use the explicitly quoted form if you +(D deprecated) You are now encouraged to use the explicitly quoted form if you wish to use an empty line as the terminator of the here-document. =item Use of implicit split to @_ is deprecated -(D) It makes a lot of work for the compiler when you clobber a +(D deprecated) It makes a lot of work for the compiler when you clobber a subroutine's argument list, so it's better if you assign the results of a split() explicitly to an array (or list). =item Use of inherited AUTOLOAD for non-method %s() is deprecated -(D) As an (ahem) accidental feature, C<AUTOLOAD> subroutines are looked -up as methods (using the C<@ISA> hierarchy) even when the subroutines to -be autoloaded were called as plain functions (e.g. C<Foo::bar()>), not -as methods (e.g. C<Foo-E<gt>bar()> or C<$obj-E<gt>bar()>). +(D deprecated) As an (ahem) accidental feature, C<AUTOLOAD> subroutines are +looked up as methods (using the C<@ISA> hierarchy) even when the subroutines +to be autoloaded were called as plain functions (e.g. C<Foo::bar()>), +not as methods (e.g. C<< Foo->bar() >> or C<< $obj->bar() >>). This bug will be rectified in Perl 5.005, which will use method lookup only for methods' C<AUTOLOAD>s. However, there is a significant base @@ -2871,7 +3341,7 @@ C<use AutoLoader 'AUTOLOAD';>. =item Use of reserved word "%s" is deprecated -(D) The indicated bareword is a reserved word. Future versions of perl +(D deprecated) The indicated bareword is a reserved word. Future versions of perl may use it as a keyword, so you're better off either explicitly quoting the word in a manner appropriate for its context of use, or using a different name altogether. The warning can be suppressed for subroutine @@ -2880,15 +3350,15 @@ e.g. C<&our()>, or C<Foo::our()>. =item Use of %s is deprecated -(D) The construct indicated is no longer recommended for use, generally +(D deprecated) The construct indicated is no longer recommended for use, generally because there's a better way to do it, and also because the old way has bad side effects. -=item Use of uninitialized value +=item Use of uninitialized value%s -(W) An undefined value was used as if it were already defined. It was +(W uninitialized) An undefined value was used as if it were already defined. It was interpreted as a "" or a 0, but maybe it was a mistake. To suppress this -warning assign an initial value to your variables. +warning assign a defined value to your variables. =item Useless use of "re" pragma @@ -2896,7 +3366,7 @@ warning assign an initial value to your variables. =item Useless use of %s in void context -(W) You did something without a side effect in a context that does nothing +(W void) You did something without a side effect in a context that does nothing with the return value, such as a statement that doesn't return a value from a block, or the left side of a scalar comma operator. Very often this points not to stupidity on your part, but a failure of Perl to parse @@ -2927,17 +3397,24 @@ L<perlref> for more on this. =item untie attempted while %d inner references still exist -(W) A copy of the object returned from C<tie> (or C<tied>) was still +(W untie) A copy of the object returned from C<tie> (or C<tied>) was still valid when C<untie> was called. =item Value of %s can be "0"; test with defined() -(W) In a conditional expression, you used <HANDLE>, <*> (glob), C<each()>, +(W misc) In a conditional expression, you used <HANDLE>, <*> (glob), C<each()>, or C<readdir()> as a boolean value. Each of these constructs can return a value of "0"; that would make the conditional expression false, which is probably not what you intended. When using these constructs in conditional expressions, test their values with the C<defined> operator. +=item Value of CLI symbol "%s" too long + +(W misc) A warning peculiar to VMS. Perl tried to read the value of an %ENV +element from a CLI symbol table, and found a resultant string longer +than 1024 characters. The return value has been truncated to 1024 +characters. + =item Variable "%s" is not imported%s (F) While "use strict" in effect, you referred to a global variable @@ -2948,7 +3425,7 @@ on the front of your variable. =item Variable "%s" may be unavailable -(W) An inner (nested) I<anonymous> subroutine is inside a I<named> +(W closure) An inner (nested) I<anonymous> subroutine is inside a I<named> subroutine, and outside that is another subroutine; and the anonymous (innermost) subroutine is referencing a lexical variable defined in the outermost subroutine. For example: @@ -2970,7 +3447,7 @@ subroutine in between interferes with this feature. =item Variable "%s" will not stay shared -(W) An inner (nested) I<named> subroutine is referencing a lexical +(W closure) An inner (nested) I<named> subroutine is referencing a lexical variable defined in an outer subroutine. When the inner subroutine is called, it will probably see the value of @@ -2996,6 +3473,12 @@ variables. of Perl. Check the #! line, or manually feed your script into Perl yourself. +=item Version number must be a constant number + +(P) The attempt to translate a C<use Module n.n LIST> statement into +its equivalent C<BEGIN> block found an internal inconsistency with +the version number. + =item perl: warning: Setting locale failed. (S) The whole warning message will look something like: @@ -3029,7 +3512,7 @@ close(). This usually indicates your file system ran out of disk space. =item Warning: Use of "%s" without parentheses is ambiguous -(S) You wrote a unary operator followed by something that looks like a +(S ambiguous) You wrote a unary operator followed by something that looks like a binary operator that could also have been interpreted as a term or unary operator. For instance, if you know that the rand function has a default argument of 1.0, and you write @@ -3046,9 +3529,9 @@ but in actual fact, you got So put in parentheses to say what you really mean. -=item Write on closed filehandle +=item write() on closed filehandle %s -(W) The filehandle you're writing to got itself closed sometime before now. +(W closed) The filehandle you're writing to got itself closed sometime before now. Check your logic flow. =item X outside of string @@ -3084,32 +3567,32 @@ the eg directory to put a setuid C wrapper around your script. =item You need to quote "%s" -(W) You assigned a bareword as a signal handler name. Unfortunately, you +(W syntax) You assigned a bareword as a signal handler name. Unfortunately, you already have a subroutine of that name declared, which means that Perl 5 will try to call the subroutine when the assignment is executed, which is probably not what you want. (If it IS what you want, put an & in front.) -=item [gs]etsockopt() on closed fd +=item %cetsockopt() on closed socket %s -(W) You tried to get or set a socket option on a closed socket. +(W closed) You tried to get or set a socket option on a closed socket. Did you forget to check the return value of your socket() call? -See L<perlfunc/getsockopt>. +See L<perlfunc/getsockopt> and L<perlfunc/setsockopt>. =item \1 better written as $1 -(W) Outside of patterns, backreferences live on as variables. The use +(W syntax) Outside of patterns, backreferences live on as variables. The use of backslashes is grandfathered on the right-hand side of a substitution, but stylistically it's better to use the variable form because other Perl programmers will expect it, and it works better if there are more than 9 backreferences. -=item '|' and 'E<lt>' may not both be specified on command line +=item '|' and '<' may not both be specified on command line (F) An error peculiar to VMS. Perl does its own command line redirection, and found that STDIN was a pipe, and that you also tried to redirect STDIN using -'E<lt>'. Only one STDIN stream to a customer, please. +'<'. Only one STDIN stream to a customer, please. -=item '|' and 'E<gt>' may not both be specified on command line +=item '|' and '>' may not both be specified on command line (F) An error peculiar to VMS. Perl does its own command line redirection, and thinks you tried to redirect stdout both to a file and into a pipe to another @@ -3159,3 +3642,4 @@ in F<README.os2>. =back +=cut diff --git a/contrib/perl5/pod/perldsc.pod b/contrib/perl5/pod/perldsc.pod index ef3ae75..5ab97e1 100644 --- a/contrib/perl5/pod/perldsc.pod +++ b/contrib/perl5/pod/perldsc.pod @@ -8,8 +8,8 @@ The single feature most sorely lacking in the Perl programming language prior to its 5.0 release was complex data structures. Even without direct language support, some valiant programmers did manage to emulate them, but it was hard work and not for the faint of heart. You could occasionally -get away with the C<$m{$LoL,$b}> notation borrowed from I<awk> in which the -keys are actually more like a single concatenated string C<"$LoL$b">, but +get away with the C<$m{$AoA,$b}> notation borrowed from B<awk> in which the +keys are actually more like a single concatenated string C<"$AoA$b">, but traversal and sorting were difficult. More desperate programmers even hacked Perl's internal symbol table directly, a strategy that proved hard to develop and maintain--to put it mildly. @@ -21,7 +21,7 @@ with three dimensions! for $x (1 .. 10) { for $y (1 .. 10) { for $z (1 .. 10) { - $LoL[$x][$y][$z] = + $AoA[$x][$y][$z] = $x ** $y + $z; } } @@ -30,7 +30,7 @@ with three dimensions! Alas, however simple this may appear, underneath it's a much more elaborate construct than meets the eye! -How do you print it out? Why can't you say just C<print @LoL>? How do +How do you print it out? Why can't you say just C<print @AoA>? How do you sort it? How can you pass it to a function or get one of these back from a function? Is is an object? Can you save it to disk to read back later? How do you access whole rows or columns of that matrix? Do @@ -93,8 +93,8 @@ level. It's just that you can I<use> it as though it were a two-dimensional one. This is actually the way almost all C multidimensional arrays work as well. - $list[7][12] # array of arrays - $list[7]{string} # array of hashes + $array[7][12] # array of arrays + $array[7]{string} # array of hashes $hash{string}[7] # hash of arrays $hash{string}{'another string'} # hash of hashes @@ -102,10 +102,10 @@ Now, because the top level contains only references, if you try to print out your array in with a simple print() function, you'll get something that doesn't look very nice, like this: - @LoL = ( [2, 3], [4, 5, 7], [0] ); - print $LoL[1][2]; + @AoA = ( [2, 3], [4, 5, 7], [0] ); + print $AoA[1][2]; 7 - print @LoL; + print @AoA; ARRAY(0x83c38)ARRAY(0x8b194)ARRAY(0x8b1d0) @@ -124,25 +124,25 @@ repeatedly. Here's the case where you just get the count instead of a nested array: for $i (1..10) { - @list = somefunc($i); - $LoL[$i] = @list; # WRONG! + @array = somefunc($i); + $AoA[$i] = @array; # WRONG! } -That's just the simple case of assigning a list to a scalar and getting +That's just the simple case of assigning an array to a scalar and getting its element count. If that's what you really and truly want, then you might do well to consider being a tad more explicit about it, like this: for $i (1..10) { - @list = somefunc($i); - $counts[$i] = scalar @list; + @array = somefunc($i); + $counts[$i] = scalar @array; } Here's the case of taking a reference to the same memory location again and again: for $i (1..10) { - @list = somefunc($i); - $LoL[$i] = \@list; # WRONG! + @array = somefunc($i); + $AoA[$i] = \@array; # WRONG! } So, what's the big problem with that? It looks right, doesn't it? @@ -150,8 +150,8 @@ After all, I just told you that you need an array of references, so by golly, you've made me one! Unfortunately, while this is true, it's still broken. All the references -in @LoL refer to the I<very same place>, and they will therefore all hold -whatever was last in @list! It's similar to the problem demonstrated in +in @AoA refer to the I<very same place>, and they will therefore all hold +whatever was last in @array! It's similar to the problem demonstrated in the following C program: #include <pwd.h> @@ -176,40 +176,40 @@ hash constructor C<{}> instead. Here's the right way to do the preceding broken code fragments: for $i (1..10) { - @list = somefunc($i); - $LoL[$i] = [ @list ]; + @array = somefunc($i); + $AoA[$i] = [ @array ]; } The square brackets make a reference to a new array with a I<copy> -of what's in @list at the time of the assignment. This is what +of what's in @array at the time of the assignment. This is what you want. Note that this will produce something similar, but it's much harder to read: for $i (1..10) { - @list = 0 .. $i; - @{$LoL[$i]} = @list; + @array = 0 .. $i; + @{$AoA[$i]} = @array; } Is it the same? Well, maybe so--and maybe not. The subtle difference is that when you assign something in square brackets, you know for sure it's always a brand new reference with a new I<copy> of the data. -Something else could be going on in this new case with the C<@{$LoL[$i]}}> +Something else could be going on in this new case with the C<@{$AoA[$i]}}> dereference on the left-hand-side of the assignment. It all depends on -whether C<$LoL[$i]> had been undefined to start with, or whether it -already contained a reference. If you had already populated @LoL with +whether C<$AoA[$i]> had been undefined to start with, or whether it +already contained a reference. If you had already populated @AoA with references, as in - $LoL[3] = \@another_list; + $AoA[3] = \@another_array; Then the assignment with the indirection on the left-hand-side would use the existing reference that was already there: - @{$LoL[3]} = @list; + @{$AoA[3]} = @array; Of course, this I<would> have the "interesting" effect of clobbering -@another_list. (Have you ever noticed how when a programmer says +@another_array. (Have you ever noticed how when a programmer says something is "interesting", that rather than meaning "intriguing", they're disturbingly more apt to mean that it's "annoying", "difficult", or both? :-) @@ -222,8 +222,8 @@ Surprisingly, the following dangerous-looking construct will actually work out fine: for $i (1..10) { - my @list = somefunc($i); - $LoL[$i] = \@list; + my @array = somefunc($i); + $AoA[$i] = \@array; } That's because my() is more of a run-time statement than it is a @@ -242,18 +242,18 @@ do the right thing behind the scenes. In summary: - $LoL[$i] = [ @list ]; # usually best - $LoL[$i] = \@list; # perilous; just how my() was that list? - @{ $LoL[$i] } = @list; # way too tricky for most programmers + $AoA[$i] = [ @array ]; # usually best + $AoA[$i] = \@array; # perilous; just how my() was that array? + @{ $AoA[$i] } = @array; # way too tricky for most programmers =head1 CAVEAT ON PRECEDENCE -Speaking of things like C<@{$LoL[$i]}>, the following are actually the +Speaking of things like C<@{$AoA[$i]}>, the following are actually the same thing: - $listref->[2][2] # clear - $$listref[2][2] # confusing + $aref->[2][2] # clear + $$aref[2][2] # confusing That's because Perl's precedence rules on its five prefix dereferencers (which look like someone swearing: C<$ @ * % &>) make them bind more @@ -263,11 +263,11 @@ accustomed to using C<*a[i]> to mean what's pointed to by the I<i'th> element of C<a>. That is, they first take the subscript, and only then dereference the thing at that subscript. That's fine in C, but this isn't C. -The seemingly equivalent construct in Perl, C<$$listref[$i]> first does -the deref of C<$listref>, making it take $listref as a reference to an +The seemingly equivalent construct in Perl, C<$$aref[$i]> first does +the deref of $aref, making it take $aref as a reference to an array, and then dereference that, and finally tell you the I<i'th> value -of the array pointed to by $LoL. If you wanted the C notion, you'd have to -write C<${$LoL[$i]}> to force the C<$LoL[$i]> to get evaluated first +of the array pointed to by $AoA. If you wanted the C notion, you'd have to +write C<${$AoA[$i]}> to force the C<$AoA[$i]> to get evaluated first before the leading C<$> dereferencer. =head1 WHY YOU SHOULD ALWAYS C<use strict> @@ -283,19 +283,19 @@ This way, you'll be forced to declare all your variables with my() and also disallow accidental "symbolic dereferencing". Therefore if you'd done this: - my $listref = [ + my $aref = [ [ "fred", "barney", "pebbles", "bambam", "dino", ], [ "homer", "bart", "marge", "maggie", ], [ "george", "jane", "elroy", "judy", ], ]; - print $listref[2][2]; + print $aref[2][2]; The compiler would immediately flag that as an error I<at compile time>, -because you were accidentally accessing C<@listref>, an undeclared +because you were accidentally accessing C<@aref>, an undeclared variable, and it would thereby remind you to write instead: - print $listref->[2][2] + print $aref->[2][2] =head1 DEBUGGING @@ -303,10 +303,10 @@ Before version 5.002, the standard Perl debugger didn't do a very nice job of printing out complex data structures. With 5.002 or above, the debugger includes several new features, including command line editing as well as the C<x> command to dump out complex data structures. For -example, given the assignment to $LoL above, here's the debugger output: +example, given the assignment to $AoA above, here's the debugger output: - DB<1> x $LoL - $LoL = ARRAY(0x13b5a0) + DB<1> x $AoA + $AoA = ARRAY(0x13b5a0) 0 ARRAY(0x1f0a24) 0 'fred' 1 'barney' @@ -330,79 +330,79 @@ Presented with little comment (these will get their own manpages someday) here are short code examples illustrating access of various types of data structures. -=head1 LISTS OF LISTS +=head1 ARRAYS OF ARRAYS -=head2 Declaration of a LIST OF LISTS +=head2 Declaration of a ARRAY OF ARRAYS - @LoL = ( + @AoA = ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], [ "homer", "marge", "bart" ], ); -=head2 Generation of a LIST OF LISTS +=head2 Generation of a ARRAY OF ARRAYS # reading from file while ( <> ) { - push @LoL, [ split ]; + push @AoA, [ split ]; } # calling a function for $i ( 1 .. 10 ) { - $LoL[$i] = [ somefunc($i) ]; + $AoA[$i] = [ somefunc($i) ]; } # using temp vars for $i ( 1 .. 10 ) { @tmp = somefunc($i); - $LoL[$i] = [ @tmp ]; + $AoA[$i] = [ @tmp ]; } # add to an existing row - push @{ $LoL[0] }, "wilma", "betty"; + push @{ $AoA[0] }, "wilma", "betty"; -=head2 Access and Printing of a LIST OF LISTS +=head2 Access and Printing of a ARRAY OF ARRAYS # one element - $LoL[0][0] = "Fred"; + $AoA[0][0] = "Fred"; # another element - $LoL[1][1] =~ s/(\w)/\u$1/; + $AoA[1][1] =~ s/(\w)/\u$1/; # print the whole thing with refs - for $aref ( @LoL ) { + for $aref ( @AoA ) { print "\t [ @$aref ],\n"; } # print the whole thing with indices - for $i ( 0 .. $#LoL ) { - print "\t [ @{$LoL[$i]} ],\n"; + for $i ( 0 .. $#AoA ) { + print "\t [ @{$AoA[$i]} ],\n"; } # print the whole thing one at a time - for $i ( 0 .. $#LoL ) { - for $j ( 0 .. $#{ $LoL[$i] } ) { - print "elt $i $j is $LoL[$i][$j]\n"; + for $i ( 0 .. $#AoA ) { + for $j ( 0 .. $#{ $AoA[$i] } ) { + print "elt $i $j is $AoA[$i][$j]\n"; } } -=head1 HASHES OF LISTS +=head1 HASHES OF ARRAYS -=head2 Declaration of a HASH OF LISTS +=head2 Declaration of a HASH OF ARRAYS - %HoL = ( + %HoA = ( flintstones => [ "fred", "barney" ], jetsons => [ "george", "jane", "elroy" ], simpsons => [ "homer", "marge", "bart" ], ); -=head2 Generation of a HASH OF LISTS +=head2 Generation of a HASH OF ARRAYS # reading from file # flintstones: fred barney wilma dino while ( <> ) { next unless s/^(.*?):\s*//; - $HoL{$1} = [ split ]; + $HoA{$1} = [ split ]; } # reading from file; more temps @@ -410,65 +410,65 @@ types of data structures. while ( $line = <> ) { ($who, $rest) = split /:\s*/, $line, 2; @fields = split ' ', $rest; - $HoL{$who} = [ @fields ]; + $HoA{$who} = [ @fields ]; } # calling a function that returns a list for $group ( "simpsons", "jetsons", "flintstones" ) { - $HoL{$group} = [ get_family($group) ]; + $HoA{$group} = [ get_family($group) ]; } # likewise, but using temps for $group ( "simpsons", "jetsons", "flintstones" ) { @members = get_family($group); - $HoL{$group} = [ @members ]; + $HoA{$group} = [ @members ]; } # append new members to an existing family - push @{ $HoL{"flintstones"} }, "wilma", "betty"; + push @{ $HoA{"flintstones"} }, "wilma", "betty"; -=head2 Access and Printing of a HASH OF LISTS +=head2 Access and Printing of a HASH OF ARRAYS # one element - $HoL{flintstones}[0] = "Fred"; + $HoA{flintstones}[0] = "Fred"; # another element - $HoL{simpsons}[1] =~ s/(\w)/\u$1/; + $HoA{simpsons}[1] =~ s/(\w)/\u$1/; # print the whole thing - foreach $family ( keys %HoL ) { - print "$family: @{ $HoL{$family} }\n" + foreach $family ( keys %HoA ) { + print "$family: @{ $HoA{$family} }\n" } # print the whole thing with indices - foreach $family ( keys %HoL ) { + foreach $family ( keys %HoA ) { print "family: "; - foreach $i ( 0 .. $#{ $HoL{$family} } ) { - print " $i = $HoL{$family}[$i]"; + foreach $i ( 0 .. $#{ $HoA{$family} } ) { + print " $i = $HoA{$family}[$i]"; } print "\n"; } # print the whole thing sorted by number of members - foreach $family ( sort { @{$HoL{$b}} <=> @{$HoL{$a}} } keys %HoL ) { - print "$family: @{ $HoL{$family} }\n" + foreach $family ( sort { @{$HoA{$b}} <=> @{$HoA{$a}} } keys %HoA ) { + print "$family: @{ $HoA{$family} }\n" } # print the whole thing sorted by number of members and name foreach $family ( sort { - @{$HoL{$b}} <=> @{$HoL{$a}} + @{$HoA{$b}} <=> @{$HoA{$a}} || $a cmp $b - } keys %HoL ) + } keys %HoA ) { - print "$family: ", join(", ", sort @{ $HoL{$family} }), "\n"; + print "$family: ", join(", ", sort @{ $HoA{$family} }), "\n"; } -=head1 LISTS OF HASHES +=head1 ARRAYS OF HASHES -=head2 Declaration of a LIST OF HASHES +=head2 Declaration of a ARRAY OF HASHES - @LoH = ( + @AoH = ( { Lead => "fred", Friend => "barney", @@ -485,7 +485,7 @@ types of data structures. } ); -=head2 Generation of a LIST OF HASHES +=head2 Generation of a ARRAY OF HASHES # reading from file # format: LEAD=fred FRIEND=barney @@ -495,7 +495,7 @@ types of data structures. ($key, $value) = split /=/, $field; $rec->{$key} = $value; } - push @LoH, $rec; + push @AoH, $rec; } @@ -503,34 +503,34 @@ types of data structures. # format: LEAD=fred FRIEND=barney # no temp while ( <> ) { - push @LoH, { split /[\s+=]/ }; + push @AoH, { split /[\s+=]/ }; } - # calling a function that returns a key,value list, like + # calling a function that returns a key/value pair list, like # "lead","fred","daughter","pebbles" while ( %fields = getnextpairset() ) { - push @LoH, { %fields }; + push @AoH, { %fields }; } # likewise, but using no temp vars while (<>) { - push @LoH, { parsepairs($_) }; + push @AoH, { parsepairs($_) }; } # add key/value to an element - $LoH[0]{pet} = "dino"; - $LoH[2]{pet} = "santa's little helper"; + $AoH[0]{pet} = "dino"; + $AoH[2]{pet} = "santa's little helper"; -=head2 Access and Printing of a LIST OF HASHES +=head2 Access and Printing of a ARRAY OF HASHES # one element - $LoH[0]{lead} = "fred"; + $AoH[0]{lead} = "fred"; # another element - $LoH[1]{lead} =~ s/(\w)/\u$1/; + $AoH[1]{lead} =~ s/(\w)/\u$1/; # print the whole thing with refs - for $href ( @LoH ) { + for $href ( @AoH ) { print "{ "; for $role ( keys %$href ) { print "$role=$href->{$role} "; @@ -539,18 +539,18 @@ types of data structures. } # print the whole thing with indices - for $i ( 0 .. $#LoH ) { + for $i ( 0 .. $#AoH ) { print "$i is { "; - for $role ( keys %{ $LoH[$i] } ) { - print "$role=$LoH[$i]{$role} "; + for $role ( keys %{ $AoH[$i] } ) { + print "$role=$AoH[$i]{$role} "; } print "}\n"; } # print the whole thing one at a time - for $i ( 0 .. $#LoH ) { - for $role ( keys %{ $LoH[$i] } ) { - print "elt $i $role is $LoH[$i]{$role}\n"; + for $i ( 0 .. $#AoH ) { + for $role ( keys %{ $AoH[$i] } ) { + print "elt $i $role is $AoH[$i]{$role}\n"; } } @@ -767,9 +767,9 @@ many different sorts: ########################################################### # now, you might want to make interesting extra fields that # include pointers back into the same data structure so if - # change one piece, it changes everywhere, like for examples - # if you wanted a {kids} field that was an array reference - # to a list of the kids' records without having duplicate + # change one piece, it changes everywhere, like for example + # if you wanted a {kids} field that was a reference + # to an array of the kids' records without having duplicate # records and thus update problems. ########################################################### foreach $family (keys %TV) { @@ -784,7 +784,7 @@ many different sorts: $rec->{kids} = [ @kids ]; } - # you copied the list, but the list itself contains pointers + # you copied the array, but the array itself contains pointers # to uncopied objects. this means that if you make bart get # older via diff --git a/contrib/perl5/pod/perlembed.pod b/contrib/perl5/pod/perlembed.pod index 03c5507..c4df676 100644 --- a/contrib/perl5/pod/perlembed.pod +++ b/contrib/perl5/pod/perlembed.pod @@ -12,7 +12,7 @@ Do you want to: =item B<Use C from Perl?> -Read L<perlxstut>, L<perlxs>, L<h2xs>, and L<perlguts>. +Read L<perlxstut>, L<perlxs>, L<h2xs>, L<perlguts>, and L<perlapi>. =item B<Use a Unix program from Perl?> @@ -200,9 +200,9 @@ calling I<perl_run>. =head2 Calling a Perl subroutine from your C program -To call individual Perl subroutines, you can use any of the B<perl_call_*> +To call individual Perl subroutines, you can use any of the B<call_*> functions documented in L<perlcall>. -In this example we'll use C<perl_call_argv>. +In this example we'll use C<call_argv>. That's shown below, in a program I'll call I<showtime.c>. @@ -221,7 +221,7 @@ That's shown below, in a program I'll call I<showtime.c>. /*** skipping perl_run() ***/ - perl_call_argv("showtime", G_DISCARD | G_NOARGS, args); + call_argv("showtime", G_DISCARD | G_NOARGS, args); perl_destruct(my_perl); perl_free(my_perl); @@ -257,7 +257,7 @@ and package C<END {}> blocks. If you want to pass arguments to the Perl subroutine, you can add strings to the C<NULL>-terminated C<args> list passed to -I<perl_call_argv>. For other data types, or to examine return values, +I<call_argv>. For other data types, or to examine return values, you'll need to manipulate the Perl stack. That's demonstrated in the last section of this document: L<Fiddling with the Perl stack from your C program>. @@ -265,7 +265,7 @@ your C program>. =head2 Evaluating a Perl statement from your C program Perl provides two API functions to evaluate pieces of Perl code. -These are L<perlguts/perl_eval_sv> and L<perlguts/perl_eval_pv>. +These are L<perlapi/eval_sv> and L<perlapi/eval_pv>. Arguably, these are the only routines you'll ever need to execute snippets of Perl code from within your C program. Your code can be as @@ -273,44 +273,44 @@ long as you wish; it can contain multiple statements; it can employ L<perlfunc/use>, L<perlfunc/require>, and L<perlfunc/do> to include external Perl files. -I<perl_eval_pv> lets us evaluate individual Perl strings, and then +I<eval_pv> lets us evaluate individual Perl strings, and then extract variables for coercion into C types. The following program, I<string.c>, executes three Perl strings, extracting an C<int> from the first, a C<float> from the second, and a C<char *> from the third. #include <EXTERN.h> #include <perl.h> - + static PerlInterpreter *my_perl; - + main (int argc, char **argv, char **env) { STRLEN n_a; char *embedding[] = { "", "-e", "0" }; - + my_perl = perl_alloc(); perl_construct( my_perl ); - + perl_parse(my_perl, NULL, 3, embedding, NULL); perl_run(my_perl); - + /** Treat $a as an integer **/ - perl_eval_pv("$a = 3; $a **= 2", TRUE); - printf("a = %d\n", SvIV(perl_get_sv("a", FALSE))); - + eval_pv("$a = 3; $a **= 2", TRUE); + printf("a = %d\n", SvIV(get_sv("a", FALSE))); + /** Treat $a as a float **/ - perl_eval_pv("$a = 3.14; $a **= 2", TRUE); - printf("a = %f\n", SvNV(perl_get_sv("a", FALSE))); - + eval_pv("$a = 3.14; $a **= 2", TRUE); + printf("a = %f\n", SvNV(get_sv("a", FALSE))); + /** Treat $a as a string **/ - perl_eval_pv("$a = 'rekcaH lreP rehtonA tsuJ'; $a = reverse($a);", TRUE); - printf("a = %s\n", SvPV(perl_get_sv("a", FALSE), n_a)); - + eval_pv("$a = 'rekcaH lreP rehtonA tsuJ'; $a = reverse($a);", TRUE); + printf("a = %s\n", SvPV(get_sv("a", FALSE), n_a)); + perl_destruct(my_perl); perl_free(my_perl); } -All of those strange functions with I<sv> in their names help convert Perl scalars to C types. They're described in L<perlguts>. +All of those strange functions with I<sv> in their names help convert Perl scalars to C types. They're described in L<perlguts> and L<perlapi>. If you compile and run I<string.c>, you'll see the results of using I<SvIV()> to create an C<int>, I<SvNV()> to create a C<float>, and @@ -323,11 +323,11 @@ I<SvPV()> to create a string: In the example above, we've created a global variable to temporarily store the computed value of our eval'd expression. It is also possible and in most cases a better strategy to fetch the return value -from I<perl_eval_pv()> instead. Example: +from I<eval_pv()> instead. Example: ... STRLEN n_a; - SV *val = perl_eval_pv("reverse 'rekcaH lreP rehtonA tsuJ'", TRUE); + SV *val = eval_pv("reverse 'rekcaH lreP rehtonA tsuJ'", TRUE); printf("%s\n", SvPV(val,n_a)); ... @@ -336,7 +336,7 @@ variables and we've simplified our code as well. =head2 Performing Perl pattern matches and substitutions from your C program -The I<perl_eval_sv()> function lets us evaluate strings of Perl code, so we can +The I<eval_sv()> function lets us evaluate strings of Perl code, so we can define some functions that use it to "specialize" in matches and substitutions: I<match()>, I<substitute()>, and I<matches()>. @@ -364,51 +364,51 @@ been wrapped here): #include <EXTERN.h> #include <perl.h> - - /** my_perl_eval_sv(code, error_check) - ** kinda like perl_eval_sv(), + + /** my_eval_sv(code, error_check) + ** kinda like eval_sv(), ** but we pop the return value off the stack **/ - SV* my_perl_eval_sv(SV *sv, I32 croak_on_error) + SV* my_eval_sv(SV *sv, I32 croak_on_error) { dSP; SV* retval; STRLEN n_a; - + PUSHMARK(SP); - perl_eval_sv(sv, G_SCALAR); - + eval_sv(sv, G_SCALAR); + SPAGAIN; retval = POPs; PUTBACK; - + if (croak_on_error && SvTRUE(ERRSV)) croak(SvPVx(ERRSV, n_a)); - + return retval; } - + /** match(string, pattern) ** ** Used for matches in a scalar context. ** ** Returns 1 if the match was successful; 0 otherwise. **/ - + I32 match(SV *string, char *pattern) { SV *command = NEWSV(1099, 0), *retval; STRLEN n_a; - + sv_setpvf(command, "my $string = '%s'; $string =~ %s", SvPV(string,n_a), pattern); - - retval = my_perl_eval_sv(command, TRUE); + + retval = my_eval_sv(command, TRUE); SvREFCNT_dec(command); - + return SvIV(retval); } - + /** substitute(string, pattern) ** ** Used for =~ operations that modify their left-hand side (s/// and tr///) @@ -416,22 +416,22 @@ been wrapped here): ** Returns the number of successful matches, and ** modifies the input string if there were any. **/ - + I32 substitute(SV **string, char *pattern) { SV *command = NEWSV(1099, 0), *retval; STRLEN n_a; - + sv_setpvf(command, "$string = '%s'; ($string =~ %s)", SvPV(*string,n_a), pattern); - - retval = my_perl_eval_sv(command, TRUE); + + retval = my_eval_sv(command, TRUE); SvREFCNT_dec(command); - - *string = perl_get_sv("string", FALSE); + + *string = get_sv("string", FALSE); return SvIV(retval); } - + /** matches(string, pattern, matches) ** ** Used for matches in an array context. @@ -439,25 +439,25 @@ been wrapped here): ** Returns the number of matches, ** and fills in **matches with the matching substrings **/ - + I32 matches(SV *string, char *pattern, AV **match_list) { SV *command = NEWSV(1099, 0); I32 num_matches; STRLEN n_a; - + sv_setpvf(command, "my $string = '%s'; @array = ($string =~ %s)", SvPV(string,n_a), pattern); - - my_perl_eval_sv(command, TRUE); + + my_eval_sv(command, TRUE); SvREFCNT_dec(command); - - *match_list = perl_get_av("array", FALSE); + + *match_list = get_av("array", FALSE); num_matches = av_len(*match_list) + 1; /** assume $[ is 0 **/ - + return num_matches; } - + main (int argc, char **argv, char **env) { PerlInterpreter *my_perl = perl_alloc(); @@ -466,30 +466,30 @@ been wrapped here): I32 num_matches, i; SV *text = NEWSV(1099,0); STRLEN n_a; - + perl_construct(my_perl); perl_parse(my_perl, NULL, 3, embedding, NULL); - + sv_setpv(text, "When he is at a convenience store and the bill comes to some amount like 76 cents, Maynard is aware that there is something he *should* do, something that will enable him to get back a quarter, but he has no idea *what*. He fumbles through his red squeezey changepurse and gives the boy three extra pennies with his dollar, hoping that he might luck into the correct amount. The boy gives him back two of his own pennies and then the big shiny quarter that is his prize. -RICHH"); - + if (match(text, "m/quarter/")) /** Does text contain 'quarter'? **/ printf("match: Text contains the word 'quarter'.\n\n"); else printf("match: Text doesn't contain the word 'quarter'.\n\n"); - + if (match(text, "m/eighth/")) /** Does text contain 'eighth'? **/ printf("match: Text contains the word 'eighth'.\n\n"); else printf("match: Text doesn't contain the word 'eighth'.\n\n"); - + /** Match all occurrences of /wi../ **/ num_matches = matches(text, "m/(wi..)/g", &match_list); printf("matches: m/(wi..)/g found %d matches...\n", num_matches); - + for (i = 0; i < num_matches; i++) printf("match: %s\n", SvPV(*av_fetch(match_list, i, FALSE),n_a)); printf("\n"); - + /** Remove all vowels from text **/ num_matches = substitute(&text, "s/[aeiou]//gi"); if (num_matches) { @@ -497,12 +497,12 @@ been wrapped here): num_matches); printf("Now text is: %s\n\n", SvPV(text,n_a)); } - + /** Attempt a substitution **/ if (!substitute(&text, "s/Perl/C/")) { printf("substitute: s/Perl/C...No substitution made.\n\n"); } - + SvREFCNT_dec(text); PL_perl_destruct_level = 1; perl_destruct(my_perl); @@ -539,7 +539,7 @@ results--the return value of your Perl subroutine--off the stack. First you'll need to know how to convert between C types and Perl types, with newSViv() and sv_setnv() and newAV() and all their -friends. They're described in L<perlguts>. +friends. They're described in L<perlguts> and L<perlapi>. Then you'll need to know how to manipulate the Perl stack. That's described in L<perlcall>. @@ -576,7 +576,7 @@ deep breath... XPUSHs(sv_2mortal(newSViv(a))); /* push the base onto the stack */ XPUSHs(sv_2mortal(newSViv(b))); /* push the exponent onto stack */ PUTBACK; /* make local stack pointer global */ - perl_call_pv("expo", G_SCALAR); /* call the function */ + call_pv("expo", G_SCALAR); /* call the function */ SPAGAIN; /* refresh stack pointer */ /* pop the return value from stack */ printf ("%d to the %dth power is %d.\n", a, b, POPi); @@ -638,7 +638,7 @@ the filename into a guaranteed-unique package name, and then compile the code into that package using L<perlfunc/eval>. In the example below, each file will only be compiled once. Or, the application might choose to clean out the symbol table associated with the file -after it's no longer needed. Using L<perlcall/perl_call_argv>, We'll +after it's no longer needed. Using L<perlapi/call_argv>, We'll call the subroutine C<Embed::Persistent::eval_file> which lives in the file C<persistent.pl> and pass the filename and boolean cleanup/cache flag as arguments. @@ -656,7 +656,7 @@ with L<perlfunc/my> whenever possible. #persistent.pl use strict; - use vars '%Cache'; + our %Cache; use Symbol qw(delete_package); sub valid_package_name { @@ -750,7 +750,7 @@ with L<perlfunc/my> whenever possible. /* call the subroutine, passing it the filename as an argument */ args[0] = filename; - perl_call_argv("Embed::Persistent::eval_file", + call_argv("Embed::Persistent::eval_file", G_DISCARD | G_EVAL, args); /* check $@ */ @@ -901,10 +901,10 @@ to see how Perl does this: # define EXTERN_C extern #endif - static void xs_init _((void)); + static void xs_init (void); - EXTERN_C void boot_DynaLoader _((CV* cv)); - EXTERN_C void boot_Socket _((CV* cv)); + EXTERN_C void boot_DynaLoader (CV* cv); + EXTERN_C void boot_Socket (CV* cv); EXTERN_C void @@ -953,7 +953,7 @@ B<ExtUtils::Embed> can also automate writing the I<xs_init> glue code. % cc -c interp.c `perl -MExtUtils::Embed -e ccopts` % cc -o interp perlxsi.o interp.o `perl -MExtUtils::Embed -e ldopts` -Consult L<perlxs> and L<perlguts> for more details. +Consult L<perlxs>, L<perlguts>, and L<perlapi> for more details. =head1 Embedding Perl under Win32 diff --git a/contrib/perl5/pod/perlfaq.pod b/contrib/perl5/pod/perlfaq.pod index cb35493..fa6943f 100644 --- a/contrib/perl5/pod/perlfaq.pod +++ b/contrib/perl5/pod/perlfaq.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq - frequently asked questions about Perl ($Date: 1999/01/08 05:54:52 $) +perlfaq - frequently asked questions about Perl ($Date: 1999/05/23 20:38:02 $) =head1 DESCRIPTION @@ -199,6 +199,8 @@ miscellaneous data issues. =item * How do I find the week-of-the-year/day-of-the-year? +=item * How do I find the current century or millennium? + =item * How can I compare two dates and find the difference? =item * How can I take a string and turn it into epoch seconds? @@ -248,13 +250,13 @@ miscellaneous data issues. =item * What's wrong with always quoting "$vars"? -=item * Why don't my E<lt>E<lt>HERE documents work? +=item * Why don't my <<HERE documents work? =item * What is the difference between a list and an array? =item * What is the difference between $array[1] and @array[1]? -=item * How can I extract just the unique elements of an array? +=item * How can I remove duplicate elements from a list or array? =item * How can I tell whether a list or array contains a certain element? @@ -359,11 +361,11 @@ I/O and the "f" issues: filehandles, flushing, formats and footers. =item * How come when I open a file read-write it wipes it out? -=item * Why do I sometimes get an "Argument list too long" when I use E<lt>*E<gt>? +=item * Why do I sometimes get an "Argument list too long" when I use <*>? =item * Is there a leak/bug in glob()? -=item * How can I open a file with a leading "E<gt>" or trailing blanks? +=item * How can I open a file with a leading ">" or trailing blanks? =item * How can I reliably rename a file? @@ -381,6 +383,8 @@ I/O and the "f" issues: filehandles, flushing, formats and footers. =item * How do I print to more than one file at once? +=item * How can I read in an entire file all at once? + =item * How can I read in a file by paragraphs? =item * How can I read a single character from a file? From the keyboard? @@ -426,7 +430,7 @@ Pattern matching and regular expressions. =item * How can I match a locale-smart version of C</[a-zA-Z]/>? -=item * How can I quote a variable to use in a regexp? +=item * How can I quote a variable to use in a regex? =item * What is C</o> really for? @@ -434,7 +438,7 @@ Pattern matching and regular expressions. =item * Can I use Perl regular expressions to match balanced text? -=item * What does it mean that regexps are greedy? How can I get around it? +=item * What does it mean that regexes are greedy? How can I get around it? =item * How do I process each word on each line? @@ -450,7 +454,7 @@ Pattern matching and regular expressions. =item * What good is C<\G> in a regular expression? -=item * Are Perl regexps DFAs or NFAs? Are they POSIX compliant? +=item * Are Perl regexes DFAs or NFAs? Are they POSIX compliant? =item * What's wrong with using grep or map in a void context? @@ -470,7 +474,7 @@ other sections. =item * Can I get a BNF/yacc/RE for the Perl language? -=item * What are all these $@%* punctuation signs, and how do I know when to use them? +=item * What are all these $@%&* punctuation signs, and how do I know when to use them? =item * Do I always/never have to quote my strings or use semicolons and commas? @@ -494,7 +498,7 @@ other sections. =item * What is variable suicide and how can I prevent it? -=item * How can I pass/return a {Function, FileHandle, Array, Hash, Method, Regexp}? +=item * How can I pass/return a {Function, FileHandle, Array, Hash, Method, Regex}? =item * How do I create a static variable? @@ -504,7 +508,7 @@ other sections. =item * What's the difference between deep and shallow binding? -=item * Why doesn't "my($foo) = E<lt>FILEE<gt>;" work right? +=item * Why doesn't "my($foo) = <FILE>;" work right? =item * How do I redefine a builtin function, operator, or method? @@ -522,6 +526,8 @@ other sections. =item * How do I clear a package? +=item * How can I use a variable as a variable name? + =back @@ -620,7 +626,7 @@ Interprocess communication (IPC), control over the user-interface =item * How do I open a file without blocking? -=item * How do I install a CPAN module? +=item * How do I install a module from CPAN? =item * What's the difference between require and use? @@ -758,6 +764,15 @@ in respect of this information or its use. =over 4 +=item 23/May/99 + +Extensive updates from the net in preparation for 5.6 release. + +=item 13/April/99 + +More minor touch-ups. Added new question at the end +of perlfaq7 on variable names within variables. + =item 7/January/99 Small touchups here and there. Added all questions in this @@ -816,4 +831,3 @@ This is the initial release of version 3 of the FAQ; consequently there have been no changes since its initial release. =back - diff --git a/contrib/perl5/pod/perlfaq1.pod b/contrib/perl5/pod/perlfaq1.pod index d4cac42..af4d7cb 100644 --- a/contrib/perl5/pod/perlfaq1.pod +++ b/contrib/perl5/pod/perlfaq1.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq1 - General Questions About Perl ($Revision: 1.20 $, $Date: 1999/01/08 04:22:09 $) +perlfaq1 - General Questions About Perl ($Revision: 1.23 $, $Date: 1999/05/23 16:08:30 $) =head1 DESCRIPTION @@ -36,8 +36,8 @@ In particular, the core development team (known as the Perl Porters) are a rag-tag band of highly altruistic individuals committed to producing better software for free than you could hope to purchase for money. You may snoop on pending developments via -nntp://news.perl.com/perl.porters-gw/ and the Deja News archive at -http://www.dejanews.com/ using the perl.porters-gw newsgroup, or you can +news://news.perl.com/perl.porters-gw/ and the Deja archive at +http://www.deja.com/ using the perl.porters-gw newsgroup, or you can subscribe to the mailing list by sending perl5-porters-request@perl.org a subscription request. @@ -56,13 +56,13 @@ You should definitely use version 5. Version 4 is old, limited, and no longer maintained; its last patch (4.036) was in 1992, long ago and far away. Sure, it's stable, but so is anything that's dead; in fact, perl4 had been called a dead, flea-bitten camel carcass. The most recent -production release is 5.005_02 (although 5.004_04 is still supported). -The most cutting-edge development release is 5.005_54. Further references +production release is 5.005_03 (although 5.004_05 is still supported). +The most cutting-edge development release is 5.005_57. Further references to the Perl language in this document refer to the production release -unless otherwise specified. There may be one or more official bug -fixes for 5.005_02 by the time you read this, and also perhaps some -experimental versions on the way to the next release. All releases -prior to 5.004 were subject to buffer overruns, a grave security issue. +unless otherwise specified. There may be one or more official bug fixes +by the time you read this, and also perhaps some experimental versions +on the way to the next release. All releases prior to 5.004 were subject +to buffer overruns, a grave security issue. =head2 What are perl4 and perl5? @@ -96,7 +96,7 @@ found in release 5. Written in nominally portable C++, Topaz hopes to maintain 100% source-compatibility with previous releases of Perl but to run significantly faster and smaller. The Topaz team hopes to provide an XS compatibility interface to allow most XS modules to work unchanged, -albeit perhaps without the efficiency that the new interface uowld allow. +albeit perhaps without the efficiency that the new interface would allow. New features in Topaz are as yet undetermined, and will be addressed once compatibility and performance goals are met. @@ -125,8 +125,8 @@ and the rare new keyword). No, Perl is easy to start learning -- and easy to keep learning. It looks like most programming languages you're likely to have experience -with, so if you've ever written an C program, an awk script, a shell -script, or even BASIC program, you're already part way there. +with, so if you've ever written a C program, an awk script, a shell +script, or even a BASIC program, you're already part way there. Most tasks only require a small subset of the Perl language. One of the guiding mottos for Perl development is "there's more than one way @@ -213,8 +213,8 @@ signify the language proper and "perl" the implementation of it, i.e. the current interpreter. Hence Tom's quip that "Nothing but perl can parse Perl." You may or may not choose to follow this usage. For example, parallelism means "awk and perl" and "Python and Perl" look -ok, while "awk and Perl" and "Python and perl" do not. But never -write "PERL", because perl isn't really an acronym, aprocryphal +OK, while "awk and Perl" and "Python and perl" do not. But never +write "PERL", because perl isn't really an acronym, apocryphal folklore and post-facto expansions notwithstanding. =head2 Is it a Perl program or a Perl script? @@ -223,7 +223,7 @@ Larry doesn't really care. He says (half in jest) that "a script is what you give the actors. A program is what you give the audience." Originally, a script was a canned sequence of normally interactive -commands, that is, a chat script. Something like a uucp or ppp chat +commands, that is, a chat script. Something like a UUCP or PPP chat script or an expect script fits the bill nicely, as do configuration scripts run by a program at its start up, such F<.cshrc> or F<.ircrc>, for example. Chat scripts were just drivers for existing programs, @@ -247,7 +247,7 @@ a definitive answer here. Now that "script" and "scripting" are terms that have been seized by unscrupulous or unknowing marketeers for their own nefarious purposes, they have begun to take on strange and often pejorative meanings, -like "non serious" or "not real programming". Consequently, some perl +like "non serious" or "not real programming". Consequently, some Perl programmers prefer to avoid them altogether. =head2 What is a JAPH? @@ -269,7 +269,7 @@ Newer examples can be found by perusing Larry's postings: =head2 How can I convince my sysadmin/supervisor/employees to use version (5/5.005/Perl instead of some other language)? If your manager or employees are wary of unsupported software, or -software which doesn't officially ship with your Operating System, you +software which doesn't officially ship with your operating system, you might try to appeal to their self-interest. If programmers can be more productive using and utilizing Perl constructs, functionality, simplicity, and power, then the typical manager/supervisor/employee @@ -295,31 +295,30 @@ by the Perl Development Team. Another big sell for Perl5 is the large number of modules and extensions which greatly reduce development time for any given task. Also mention that the difference between version 4 and version 5 of Perl is like the difference between awk and C++. -(Well, ok, maybe not quite that distinct, but you get the idea.) If you +(Well, OK, maybe not quite that distinct, but you get the idea.) If you want support and a reasonable guarantee that what you're developing will continue to work in the future, then you have to run the supported version. That probably means running the 5.005 release, although 5.004 isn't that bad. Several important bugs were fixed from the 5.000 through 5.003 versions, though, so try upgrading past them if possible. -Of particular note is the massive bughunt for buffer overflow +Of particular note is the massive bug hunt for buffer overflow problems that went into the 5.004 release. All releases prior to that, including perl4, are considered insecure and should be upgraded as soon as possible. =head1 AUTHOR AND COPYRIGHT -Copyright (c) 1997-1999 Tom Christiansen and Nathan Torkington. +Copyright (c) 1997, 1998, 1999 Tom Christiansen and Nathan Torkington. All rights reserved. When included as an integrated part of the Standard Distribution -of Perl or of its documentation (printed or otherwise), this work is +of Perl or of its documentation (printed or otherwise), this works is covered under Perl's Artistic Licence. For separate distributions of all or part of this FAQ outside of that, see L<perlfaq>. -Irrespective of its distribution, all code examples here are public +Irrespective of its distribution, all code examples here are in the public domain. You are permitted and encouraged to use this code and any derivatives thereof in your own programs for fun or for profit as you see fit. A simple comment in the code giving credit to the FAQ would be courteous but is not required. - diff --git a/contrib/perl5/pod/perlfaq2.pod b/contrib/perl5/pod/perlfaq2.pod index 32970af..af9178d 100644 --- a/contrib/perl5/pod/perlfaq2.pod +++ b/contrib/perl5/pod/perlfaq2.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq2 - Obtaining and Learning about Perl ($Revision: 1.30 $, $Date: 1998/12/29 19:43:32 $) +perlfaq2 - Obtaining and Learning about Perl ($Revision: 1.32 $, $Date: 1999/10/14 18:46:09 $) =head1 DESCRIPTION @@ -17,7 +17,7 @@ in standard Internet format (a gzipped archive in POSIX tar format). Perl builds and runs on a bewildering number of platforms. Virtually all known and current Unix derivatives are supported (Perl's native -platform), as are proprietary systems like VMS, DOS, OS/2, Windows, +platform), as are other systems like VMS, DOS, OS/2, Windows, QNX, BeOS, and the Amiga. There are also the beginnings of support for MPE/iX. @@ -42,12 +42,15 @@ get free compilers for, not for Unix systems. Some URLs that might help you are: http://language.perl.com/info/software.html - http://www.perl.com/latest/ + http://www.perl.com/pub/language/info/software.html#binary http://www.perl.com/CPAN/ports/ -If you want information on proprietary systems. A simple installation -guide for MS-DOS is available at http://www.cs.ruu.nl/~piet/perl5dos.html -and similarly for Windows 3.1 at http://www.cs.ruu.nl/~piet/perlwin3.html . +Someone looking for a Perl for Win16 might look to Laszlo Molnar's djgpp +port in http://www.perl.com/CPAN/ports/msdos/ , which comes with clear +installation instructions. A simple installation guide for MS-DOS using +Ilya Zakharevich's OS/2 port is available at +http://www.cs.ruu.nl/%7Epiet/perl5dos.html +and similarly for Windows 3.1 at http://www.cs.ruu.nl/%7Epiet/perlwin3.html . =head2 I don't have a C compiler on my system. How can I compile perl? @@ -72,7 +75,7 @@ the hard-coded @INC which perl is looking for. If this command lists any paths which don't exist on your system, then you may need to move the appropriate libraries to these locations, or create -symlinks, aliases, or shortcuts appropriately. @INC is also printed as +symbolic links, aliases, or shortcuts appropriately. @INC is also printed as part of the output of % perl -V @@ -109,7 +112,7 @@ ftp://ftp.funet.fi/pub/languages/perl/CPAN/misc/japh . Considering that there are hundreds of existing modules in the archive, one probably exists to do nearly anything you can think of. -Current categories under CPAN/modules/by-category/ include perl core +Current categories under CPAN/modules/by-category/ include Perl core modules; development support; operating system interfaces; networking, devices, and interprocess communication; data type utilities; database interfaces; user interfaces; interfaces to other languages; filenames, @@ -125,13 +128,13 @@ Certainly not. Larry expects that he'll be certified before Perl is. =head2 Where can I get information on Perl? -The complete Perl documentation is available with the perl distribution. -If you have perl installed locally, you probably have the documentation +The complete Perl documentation is available with the Perl distribution. +If you have Perl installed locally, you probably have the documentation installed as well: type C<man perl> if you're on a system resembling Unix. This will lead you to other important man pages, including how to set your $MANPATH. If you're not on a Unix system, access to the documentation will be different; for example, it might be only in HTML format. But all -proper perl installations have fully-accessible documentation. +proper Perl installations have fully-accessible documentation. You might also try C<perldoc perl> in case your system doesn't have a proper man command, or it's been misinstalled. If that doesn't @@ -155,7 +158,7 @@ assistance: http://language.perl.com/info/documentation.html http://reference.perl.com/query.cgi?tutorials -=head2 What are the Perl newsgroups on USENET? Where do I post questions? +=head2 What are the Perl newsgroups on Usenet? Where do I post questions? The now defunct comp.lang.perl newsgroup has been superseded by the following groups: @@ -168,7 +171,7 @@ following groups: comp.infosystems.www.authoring.cgi Writing CGI scripts for the Web. -There is also USENET gateway to the mailing list used by the crack +There is also Usenet gateway to the mailing list used by the crack Perl development team (perl5-porters) at news://news.perl.com/perl.porters-gw/ . @@ -180,14 +183,15 @@ to alt.sources, please make sure it follows their posting standards, including setting the Followup-To header line to NOT include alt.sources; see their FAQ (http://www.faqs.org/faqs/alt-sources-intro/) for details. -If you're just looking for software, first use Alta Vista, Deja News, and +If you're just looking for software, first use AltaVista +(http://www.altavista.com), Deja (http://www.deja.com), and search CPAN. This is faster and more productive than just posting a request. =head2 Perl Books A number of books on Perl and/or CGI programming are available. A few of -these are good, some are ok, but many aren't worth your money. Tom +these are good, some are OK, but many aren't worth your money. Tom Christiansen maintains a list of these books, some with extensive reviews, at http://www.perl.com/perl/critiques/index.html. @@ -252,10 +256,13 @@ a star may be ordered from O'Reilly. by Larry Wall, Tom Christiansen, and Randal L. Schwartz *Perl 5 Desktop Reference - By Johan Vromans + by Johan Vromans + + *Perl in a Nutshell + by Ellen Siever, Stephan Spainhour, and Nathan Patwardhan =item Tutorials - + *Learning Perl [2nd edition] by Randal L. Schwartz and Tom Christiansen with foreword by Larry Wall @@ -296,6 +303,9 @@ a star may be ordered from O'Reilly. How to Set up and Maintain a World Wide Web Site [2nd edition] by Lincoln Stein + *Learning Perl/Tk + by Nancy Walsh + =back =head2 Perl in Magazines @@ -320,38 +330,34 @@ http://www.stonehenge.com/merlyn/WebTechniques/. To get the best (and possibly cheapest) performance, pick a site from the list below and use it to grab the complete list of mirror sites. ->From there you can find the quickest site for you. Remember, the +From there you can find the quickest site for you. Remember, the following list is I<not> the complete list of CPAN mirrors. http://www.perl.com/CPAN-local http://www.perl.com/CPAN (redirects to an ftp mirror) - http://www.perl.org/CPAN + ftp://cpan.valueclick.com/pub/CPAN/ ftp://ftp.funet.fi/pub/languages/perl/CPAN/ http://www.cs.ruu.nl/pub/PERL/CPAN/ ftp://ftp.cs.colorado.edu/pub/perl/CPAN/ -=head2 What mailing lists are there for perl? +=head2 What mailing lists are there for Perl? -Most of the major modules (tk, CGI, libwww-perl) have their own +Most of the major modules (Tk, CGI, libwww-perl) have their own mailing lists. Consult the documentation that came with the module for -subscription information. The Perl Institute attempts to maintain a +subscription information. The Perl Mongers attempt to maintain a list of mailing lists at: - http://www.perl.org/maillist.html + http://www.perl.org/support/online_support.html#mail =head2 Archives of comp.lang.perl.misc -Have you tried Deja News or Alta Vista? Those are the +Have you tried Deja or AltaVista? Those are the best archives. Just look up "*perl*" as a newsgroup. - http://www.dejanews.com/dnquery.xp?QRY=&DBS=2&ST=PS&defaultOp=AND&LNG=ALL&format=terse&showsort=date&maxhits=25&subjects=&groups=*perl*&authors=&fromdate=&todate= + http://www.deja.com/dnquery.xp?QRY=&DBS=2&ST=PS&defaultOp=AND&LNG=ALL&format=terse&showsort=date&maxhits=25&subjects=&groups=*perl*&authors=&fromdate=&todate= You'll probably want to trim that down a bit, though. -ftp.cis.ufl.edu:/pub/perl/comp.lang.perl.*/monthly has an almost -complete collection dating back to 12/89 (missing 08/91 through -12/93). They are kept as one large file for each month. - You'll probably want more a sophisticated query and retrieval mechanism than a file listing, preferably one that allows you to retrieve articles using a fast-access indices, keyed on at least author, date, @@ -364,7 +370,7 @@ let perlfaq-suggestions@perl.com know. =head2 Where can I buy a commercial version of Perl? -In a real sense, Perl already I<is> commercial software: It has a licence +In a real sense, Perl already I<is> commercial software: It has a license that you can grab and carefully read to your manager. It is distributed in releases and comes in well-defined packages. There is a very large user community and an extensive literature. The comp.lang.perl.* @@ -377,45 +383,42 @@ better for everyone. However, these answers may not suffice for managers who require a purchase order from a company whom they can sue should anything go awry. Or maybe they need very serious hand-holding and contractual obligations. -Shrink-wrapped CDs with perl on them are available from several sources if -that will help. For example, many perl books carry a perl distribution -on them, as do the O'Reily Perl Resource Kits (in both the Unix flavor +Shrink-wrapped CDs with Perl on them are available from several sources if +that will help. For example, many Perl books carry a Perl distribution +on them, as do the O'Reilly Perl Resource Kits (in both the Unix flavor and in the proprietary Microsoft flavor); the free Unix distributions also all come with Perl. -Or you can purchase a real support contract. Although Cygnus historically -provided this service, they no longer sell support contracts for Perl. -Instead, the Paul Ingram Group will be taking up the slack through The -Perl Clinic. The following is a commercial from them: - -"Do you need professional support for Perl and/or Oraperl? Do you need -a support contract with defined levels of service? Do you want to pay -only for what you need? - -"The Paul Ingram Group has provided quality software development and -support services to some of the world's largest corporations for ten -years. We are now offering the same quality support services for Perl -at The Perl Clinic. This service is led by Tim Bunce, an active perl -porter since 1994 and well known as the author and maintainer of the -DBI, DBD::Oracle, and Oraperl modules and author/co-maintainer of The -Perl 5 Module List. We also offer Oracle users support for Perl5 -Oraperl and related modules (which Oracle is planning to ship as part -of Oracle Web Server 3). 20% of the profit from our Perl support work -will be donated to The Perl Institute." - -For more information, contact The Perl Clinic: - - Tel: +44 1483 424424 - Fax: +44 1483 419419 - Web: http://www.perl.co.uk/ - Email: perl-support-info@perl.co.uk or Tim.Bunce@ig.co.uk +Or you can purchase commercial incidence based support through the Perl +Clinic. The following is a commercial from them: + +"The Perl Clinic is a commercial Perl support service operated by +ActiveState Tool Corp. and The Ingram Group. The operators have many +years of in-depth experience with Perl applications and Perl internals +on a wide range of platforms. + +"Through our group of highly experienced and well-trained support engineers, +we will put our best effort into understanding your problem, providing an +explanation of the situation, and a recommendation on how to proceed." + +Contact The Perl Clinic at: + + www.PerlClinic.com + + North America Pacific Standard Time (GMT-8) + Tel: 1 604 606-4611 hours 8am-6pm + Fax: 1 604 606-4640 + + Europe (GMT) + Tel: 00 44 1483 862814 + Fax: 00 44 1483 862801 See also www.perl.com for updates on tutorials, training, and support. =head2 Where do I send bug reports? If you are reporting a bug in the perl interpreter or the modules -shipped with perl, use the I<perlbug> program in the perl distribution or +shipped with Perl, use the I<perlbug> program in the Perl distribution or mail your report to perlbug@perl.com . If you are posting a bug with a non-standard port (see the answer to @@ -426,13 +429,13 @@ bugs. Read the perlbug(1) man page (perl5.004 or later) for more information. -=head2 What is perl.com? +=head2 What is perl.com? Perl Mongers? pm.org? perl.org? The perl.com domain is owned by Tom Christiansen, who created it as a public service long before perl.org came about. Despite the name, it's a pretty non-commercial site meant to be a clearinghouse for information about all things Perlian, accepting no paid advertisements, bouncy -happy gifs, or silly java applets on its pages. The Perl Home Page at +happy GIFs, or silly Java applets on its pages. The Perl Home Page at http://www.perl.com/ is currently hosted on a T3 line courtesy of Songline Systems, a software-oriented subsidiary of O'Reilly and Associates. Other starting points include @@ -441,19 +444,36 @@ Other starting points include http://conference.perl.com/ http://reference.perl.com/ +Perl Mongers is an advocacy organization for the Perl language. For +details, see the Perl Mongers web site at http://www.perlmongers.org/. + +Perl Mongers uses the pm.org domain for services related to Perl user +groups. See the Perl user group web site at http://www.pm.org/ for more +information about joining, starting, or requesting services for a Perl +user group. + +Perl Mongers also maintains the perl.org domain to provide general +support services to the Perl community, including the hosting of mailing +lists, web sites, and other services. The web site +http://www.perl.org/ is a general advocacy site for the Perl language, +and there are many other sub-domains for special topics, such as + + http://history.perl.org/ + http://bugs.perl.org/ + http://www.news.perl.org/ + =head1 AUTHOR AND COPYRIGHT Copyright (c) 1997-1999 Tom Christiansen and Nathan Torkington. All rights reserved. When included as an integrated part of the Standard Distribution -of Perl or of its documentation (printed or otherwise), this work is -covered under Perl's Artistic Licence. For separate distributions of +of Perl or of its documentation (printed or otherwise), this works is +covered under Perl's Artistic License. For separate distributions of all or part of this FAQ outside of that, see L<perlfaq>. -Irrespective of its distribution, all code examples here are public +Irrespective of its distribution, all code examples here are in the public domain. You are permitted and encouraged to use this code and any derivatives thereof in your own programs for fun or for profit as you see fit. A simple comment in the code giving credit to the FAQ would be courteous but is not required. - diff --git a/contrib/perl5/pod/perlfaq3.pod b/contrib/perl5/pod/perlfaq3.pod index a811c3c..b05b736 100644 --- a/contrib/perl5/pod/perlfaq3.pod +++ b/contrib/perl5/pod/perlfaq3.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq3 - Programming Tools ($Revision: 1.33 $, $Date: 1998/12/29 20:12:12 $) +perlfaq3 - Programming Tools ($Revision: 1.38 $, $Date: 1999/05/23 16:08:30 $) =head1 DESCRIPTION @@ -19,13 +19,13 @@ Have you read the appropriate man pages? Here's a brief index: Objects perlref, perlmod, perlobj, perltie Data Structures perlref, perllol, perldsc Modules perlmod, perlmodlib, perlsub - Regexps perlre, perlfunc, perlop, perllocale + Regexes perlre, perlfunc, perlop, perllocale Moving to perl5 perltrap, perl Linking w/C perlxstut, perlxs, perlcall, perlguts, perlembed Various http://www.perl.com/CPAN/doc/FMTEYEWTK/index.html (not a man-page but still useful) -L<perltoc> provides a crude table of contents for the perl man page set. +A crude table of contents for the Perl man page set is found in L<perltoc>. =head2 How can I use Perl interactively? @@ -41,19 +41,20 @@ operations typically found in symbolic debuggers. =head2 Is there a Perl shell? -In general, no. The Shell.pm module (distributed with perl) makes -perl try commands which aren't part of the Perl language as shell +In general, no. The Shell.pm module (distributed with Perl) makes +Perl try commands which aren't part of the Perl language as shell commands. perlsh from the source distribution is simplistic and uninteresting, but may still be what you want. =head2 How do I debug my Perl programs? -Have you used C<-w>? It enables warnings for dubious practices. +Have you tried C<use warnings> or used C<-w>? They enable warnings +for dubious practices. Have you tried C<use strict>? It prevents you from using symbolic references, makes you predeclare any subroutines that you call as bare words, and (probably most importantly) forces you to predeclare your -variables with C<my> or C<use vars>. +variables with C<my> or C<our> or C<use vars>. Did you check the returns of each and every system call? The operating system (and thus Perl) tells you whether they worked or not, and if not @@ -130,7 +131,7 @@ can provide significant assistance. Tom swears by the following settings in vi and its clones: set ai sw=4 - map ^O {^M}^[O^T + map! ^O {^M}^[O^T Now put that in your F<.exrc> file (replacing the caret characters with control characters) and away you go. In insert mode, ^T is @@ -144,34 +145,45 @@ to a laser printer, you can take a stab at this using http://www.perl.com/CPAN/doc/misc/tips/working.vgrind.entry, but the results are not particularly satisfying for sophisticated code. -The a2ps at http://www.infres.enst.fr/~demaille/a2ps/ does lots of things +The a2ps at http://www.infres.enst.fr/%7Edemaille/a2ps/ does lots of things related to generating nicely printed output of documents. -=head2 Is there a etags/ctags for perl? +=head2 Is there a ctags for Perl? -With respect to the source code for the Perl interpreter, yes. -There has been support for etags in the source for a long time. -Ctags was introduced in v5.005_54 (and probably 5.005_03). -After building perl, type 'make etags' or 'make ctags' and both -sets of tag files will be built. - -Now, if you're looking to build a tag file for perl code, then there's -a simple one at +There's a simple one at http://www.perl.com/CPAN/authors/id/TOMC/scripts/ptags.gz which may do the trick. And if not, it's easy to hack into what you want. =head2 Is there an IDE or Windows Perl Editor? -If you're on Unix, you already have an IDE -- Unix itself. -You just have to learn the toolbox. If you're not, then you -probably don't have a toolbox, so may need something else. - -PerlBuilder (XXX URL to follow) is an integrated development -environment for Windows that supports Perl development. Perl programs -are just plain text, though, so you could download emacs for Windows -(XXX) or vim for win32 (http://www.cs.vu.nl/~tmgil/vi.html). If -you're transferring Windows files to Unix, be sure to transfer in -ASCII mode so the ends of lines are appropriately converted. +If you're on Unix, you already have an IDE -- Unix itself. This powerful +IDE derives from its interoperability, flexibility, and configurability. +If you really want to get a feel for Unix-qua-IDE, the best thing to do +is to find some high-powered programmer whose native language is Unix. +Find someone who has been at this for many years, and just sit back +and watch them at work. They have created their own IDE, one that +suits their own tastes and aptitudes. Quietly observe them edit files, +move them around, compile them, debug them, test them, etc. The entire +development *is* integrated, like a top-of-the-line German sports car: +functional, powerful, and elegant. You will be absolutely astonished +at the speed and ease exhibited by the native speaker of Unix in his +home territory. The art and skill of a virtuoso can only be seen to be +believed. That is the path to mastery -- all these cobbled little IDEs +are expensive toys designed to sell a flashy demo using cheap tricks, +and being optimized for immediate but shallow understanding rather than +enduring use, are but a dim palimpsest of real tools. + +In short, you just have to learn the toolbox. However, if you're not +on Unix, then your vendor probably didn't bother to provide you with +a proper toolbox on the so-called complete system that you forked out +your hard-earned cash on. + +PerlBuilder (XXX URL to follow) is an integrated development environment +for Windows that supports Perl development. Perl programs are just plain +text, though, so you could download emacs for Windows (???) or a vi clone +(vim) which runs on for win32 (http://www.cs.vu.nl/%7Etmgil/vi.html). +If you're transferring Windows files to Unix, be sure to transfer in +ASCII mode so the ends of lines are appropriately mangled. =head2 Where can I get Perl macros for vi? @@ -184,15 +196,15 @@ with an embedded Perl interpreter -- see http://www.perl.com/CPAN/src/misc. =head2 Where can I get perl-mode for emacs? Since Emacs version 19 patchlevel 22 or so, there have been both a -perl-mode.el and support for the perl debugger built in. These should +perl-mode.el and support for the Perl debugger built in. These should come with the standard Emacs 19 distribution. -In the perl source directory, you'll find a directory called "emacs", +In the Perl source directory, you'll find a directory called "emacs", which contains a cperl-mode that color-codes keywords, provides context-sensitive help, and other nifty things. Note that the perl-mode of emacs will have fits with C<"main'foo"> -(single quote), and mess up the indentation and hilighting. You +(single quote), and mess up the indentation and highlighting. You are probably using C<"main::foo"> in new Perl code anyway, so this shouldn't be an issue. @@ -212,11 +224,11 @@ to the Athena Widget set. Both are available from CPAN. See the directory http://www.perl.com/CPAN/modules/by-category/08_User_Interfaces/ Invaluable for Perl/Tk programming are: the Perl/Tk FAQ at -http://w4.lns.cornell.edu/~pvhp/ptk/ptkTOC.html , the Perl/Tk Reference +http://w4.lns.cornell.edu/%7Epvhp/ptk/ptkTOC.html , the Perl/Tk Reference Guide available at http://www.perl.com/CPAN-local/authors/Stephen_O_Lidie/ , and the online manpages at -http://www-users.cs.umn.edu/~amundson/perl/perltk/toc.html . +http://www-users.cs.umn.edu/%7Eamundson/perl/perltk/toc.html . =head2 How can I generate simple menus without using CGI or Tk? @@ -261,9 +273,9 @@ it. See the F<INSTALL> file in the source distribution for more information. Unsubstantiated reports allege that Perl interpreters that use sfio -outperform those that don't (for IO intensive applications). To try +outperform those that don't (for I/O intensive applications). To try this, see the F<INSTALL> file in the source distribution, especially -the ``Selecting File IO mechanisms'' section. +the ``Selecting File I/O mechanisms'' section. The undump program was an old attempt to speed up your Perl program by storing the already-compiled form to disk. This is no longer @@ -358,21 +370,21 @@ anything a module written in C can. For more on mod_perl, see http://perl.apache.org/ With the FCGI module (from CPAN) and the mod_fastcgi -module (available from http://www.fastcgi.com/) each of your perl -scripts becomes a permanent CGI daemon process. +module (available from http://www.fastcgi.com/) each of your Perl +programs becomes a permanent CGI daemon process. Both of these solutions can have far-reaching effects on your system -and on the way you write your CGI scripts, so investigate them with +and on the way you write your CGI programs, so investigate them with care. See http://www.perl.com/CPAN/modules/by-category/15_World_Wide_Web_HTML_HTTP_CGI/ . A non-free, commercial product, ``The Velocity Engine for Perl'', -(http://www.binevolve.com/ or +(http://www.binevolve.com/ or http://www.binevolve.com/velocigen/) might also be worth looking at. It will allow you to increase the performance -of your perl scripts, upto 25 times faster than normal CGI perl by -running in persistent perl mode, or 4 to 5 times faster without any -modification to your existing CGI scripts. Fully functional evaluation +of your Perl programs, up to 25 times faster than normal CGI Perl by +running in persistent Perl mode, or 4 to 5 times faster without any +modification to your existing CGI programs. Fully functional evaluation copies are available from the web site. =head2 How can I hide the source for my Perl program? @@ -404,12 +416,12 @@ your code, but none can definitively conceal it (this is true of every language, not just Perl). If you're concerned about people profiting from your code, then the -bottom line is that nothing but a restrictive licence will give you +bottom line is that nothing but a restrictive license will give you legal security. License your software and pepper it with threatening statements like ``This is unpublished proprietary software of XYZ Corp. Your access to it does not give you permission to use it blah blah blah.'' We are not lawyers, of course, so you should see a lawyer if -you want to be sure your licence's wording will stand up in court. +you want to be sure your license's wording will stand up in court. =head2 How can I compile my Perl program into byte code or C? @@ -434,8 +446,8 @@ just as big as the original perl executable, and then some. That's because as currently written, all programs are prepared for a full eval() statement. You can tremendously reduce this cost by building a shared I<libperl.so> library and linking against that. See the -F<INSTALL> podfile in the perl source distribution for details. If -you link your main perl binary with this, it will make it miniscule. +F<INSTALL> podfile in the Perl source distribution for details. If +you link your main perl binary with this, it will make it minuscule. For example, on one author's system, F</usr/bin/perl> is only 11k in size! @@ -454,7 +466,7 @@ Perl install anyway. You can't. Not yet, anyway. You can integrate Java and Perl with the Perl Resource Kit from O'Reilly and Associates. See http://www.oreilly.com/catalog/prkunix/ for more information. -The Java interface will be supported in the core 5.006 release +The Java interface will be supported in the core 5.6 release of Perl. =head2 How can I get C<#!perl> to work on [MS-DOS,NT,...]? @@ -470,23 +482,22 @@ F<INSTALL> file in the source distribution for more information). The Win95/NT installation, when using the ActiveState port of Perl, will modify the Registry to associate the C<.pl> extension with the -perl interpreter. If you install another port (Gurusamy Sarathy's is -the recommended Win95/NT port), or (eventually) build your own -Win95/NT Perl using a Windows port of gcc (e.g., with cygwin32 or -mingw32), then you'll have to modify the Registry yourself. In -addition to associating C<.pl> with the interpreter, NT people can -use: C<SET PATHEXT=%PATHEXT%;.PL> to let them run the program -C<install-linux.pl> merely by typing C<install-linux>. +perl interpreter. If you install another port, perhaps even building +your own Win95/NT Perl from the standard sources by using a Windows port +of gcc (e.g., with cygwin or mingw32), then you'll have to modify +the Registry yourself. In addition to associating C<.pl> with the +interpreter, NT people can use: C<SET PATHEXT=%PATHEXT%;.PL> to let them +run the program C<install-linux.pl> merely by typing C<install-linux>. -Macintosh perl scripts will have the appropriate Creator and -Type, so that double-clicking them will invoke the perl application. +Macintosh Perl programs will have the appropriate Creator and +Type, so that double-clicking them will invoke the Perl application. I<IMPORTANT!>: Whatever you do, PLEASE don't get frustrated, and just throw the perl interpreter into your cgi-bin directory, in order to -get your scripts working for a web server. This is an EXTREMELY big +get your programs working for a web server. This is an EXTREMELY big security risk. Take the time to figure out how to do it correctly. -=head2 Can I write useful perl programs on the command line? +=head2 Can I write useful Perl programs on the command line? Yes. Read L<perlrun> for more information. Some examples follow. (These assume standard Unix shell quoting rules.) @@ -510,9 +521,9 @@ Yes. Read L<perlrun> for more information. Some examples follow. echo $PATH | perl -nl -072 -e ' s![^/+]*$!man!&&-d&&!$s{$_}++&&push@m,$_;END{print"@m"}' -Ok, the last one was actually an obfuscated perl entry. :-) +OK, the last one was actually an Obfuscated Perl Contest entry. :-) -=head2 Why don't perl one-liners work on my DOS/Mac/VMS system? +=head2 Why don't Perl one-liners work on my DOS/Mac/VMS system? The problem is usually that the command interpreters on those systems have rather different ideas about quoting than the Unix shells under @@ -570,7 +581,7 @@ when it runs fine on the command line'', see these sources: http://www.boutell.com/faq/ CGI FAQ - http://www.webthing.com/tutorials/cgifaq.html + http://www.webthing.com/tutorials/cgifaq.html HTTP Spec http://www.w3.org/pub/WWW/Protocols/HTTP/ @@ -585,11 +596,10 @@ when it runs fine on the command line'', see these sources: CGI Security FAQ http://www.go2net.com/people/paulp/cgi-security/safe-cgi.txt -Also take a look at L<perlfaq9> =head2 Where can I learn about object-oriented Perl programming? -L<perltoot> is a good place to start, and you can use L<perlobj> and +A good place to start is L<perltoot>, and you can use L<perlobj> and L<perlbot> for reference. Perltoot didn't come out until the 5.004 release, but you can get a copy (in pod, html, or postscript) from http://www.perl.com/CPAN/doc/FMTEYEWTK/ . @@ -608,15 +618,15 @@ my C program, what am I doing wrong? Download the ExtUtils::Embed kit from CPAN and run `make test'. If the tests pass, read the pods again and again and again. If they -fail, see L<perlbug> and send a bugreport with the output of +fail, see L<perlbug> and send a bug report with the output of C<make test TEST_VERBOSE=1> along with C<perl -V>. =head2 When I tried to run my script, I got this message. What does it mean? -L<perldiag> has a complete list of perl's error messages and warnings, -with explanatory text. You can also use the splain program (distributed -with perl) to explain the error messages: +A complete list of Perl's error messages and warnings with explanatory +text can be found in L<perldiag>. You can also use the splain program +(distributed with Perl) to explain the error messages: perl program 2>diag.out splain [-v] [-p] diag.out @@ -631,7 +641,7 @@ or =head2 What's MakeMaker? -This module (part of the standard perl distribution) is designed to +This module (part of the standard Perl distribution) is designed to write a Makefile for an extension module from a Makefile.PL. For more information, see L<ExtUtils::MakeMaker>. @@ -641,13 +651,12 @@ Copyright (c) 1997-1999 Tom Christiansen and Nathan Torkington. All rights reserved. When included as an integrated part of the Standard Distribution -of Perl or of its documentation (printed or otherwise), this work is -covered under Perl's Artistic Licence. For separate distributions of +of Perl or of its documentation (printed or otherwise), this works is +covered under Perl's Artistic License. For separate distributions of all or part of this FAQ outside of that, see L<perlfaq>. -Irrespective of its distribution, all code examples here are public +Irrespective of its distribution, all code examples here are in the public domain. You are permitted and encouraged to use this code and any derivatives thereof in your own programs for fun or for profit as you see fit. A simple comment in the code giving credit to the FAQ would be courteous but is not required. - diff --git a/contrib/perl5/pod/perlfaq4.pod b/contrib/perl5/pod/perlfaq4.pod index 92aee2c..e997a8f 100644 --- a/contrib/perl5/pod/perlfaq4.pod +++ b/contrib/perl5/pod/perlfaq4.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq4 - Data Manipulation ($Revision: 1.40 $, $Date: 1999/01/08 04:26:39 $) +perlfaq4 - Data Manipulation ($Revision: 1.49 $, $Date: 1999/05/23 20:37:49 $) =head1 DESCRIPTION @@ -31,7 +31,7 @@ representation is converted back to decimal. These decimal numbers are displayed in either the format you specify with printf(), or the current output format for numbers (see L<perlvar/"$#"> if you use print. C<$#> has a different default value in Perl5 than it did in -Perl4. Changing C<$#> yourself is deprecated. +Perl4. Changing C<$#> yourself is deprecated.) This affects B<all> computer languages that represent decimal floating-point numbers in binary, not just Perl. Perl provides @@ -67,7 +67,7 @@ route. printf("%.3f", 3.1415926535); # prints 3.142 -The POSIX module (part of the standard perl distribution) implements +The POSIX module (part of the standard Perl distribution) implements ceil(), floor(), and a number of other mathematical and trigonometric functions. @@ -76,7 +76,7 @@ functions. $floor = floor(3.5); # 3 In 5.000 to 5.003 Perls, trigonometry was done in the Math::Complex -module. With 5.004, the Math::Trig module (part of the standard perl +module. With 5.004, the Math::Trig module (part of the standard Perl distribution) implements the trigonometric functions. Internally it uses the Math::Complex module and some functions can break out from the real axis into the complex plane, for example the inverse sine of @@ -104,14 +104,21 @@ are not guaranteed. =head2 How do I convert bits into ints? To turn a string of 1s and 0s like C<10110110> into a scalar containing -its binary value, use the pack() function (documented in -L<perlfunc/"pack">): +its binary value, use the pack() and unpack() functions (documented in +L<perlfunc/"pack"> and L<perlfunc/"unpack">): - $decimal = pack('B8', '10110110'); + $decimal = unpack('c', pack('B8', '10110110')); + +This packs the string C<10110110> into an eight bit binary structure. +This is then unpacked as a character, which returns its ordinal value. + +This does the same thing: + + $decimal = ord(pack('B8', '10110110')); Here's an example of going the other way: - $binary_string = join('', unpack('B*', "\x29")); + $binary_string = unpack('B*', "\x29"); =head2 Why doesn't & work the way I want it to? @@ -176,6 +183,15 @@ ranges. Instead use: push(@results, some_func($i)); } +This situation has been fixed in Perl5.005. Use of C<..> in a C<for> +loop will iterate over the range, without creating the entire range. + + for my $i (5 .. 500_005) { + push(@results, some_func($i)); + } + +will not create a list of 500,000 integers. + =head2 How can I output Roman numerals? Get the http://www.perl.com/CPAN/modules/by-module/Roman module. @@ -228,12 +244,34 @@ American businesses often consider the first week with a Monday in it to be Work Week #1, despite ISO 8601, which considers WW1 to be the first week with a Thursday in it. +=head2 How do I find the current century or millennium? + +Use the following simple functions: + + sub get_century { + return int((((localtime(shift || time))[5] + 1999))/100); + } + sub get_millennium { + return 1+int((((localtime(shift || time))[5] + 1899))/1000); + } + +On some systems, you'll find that the POSIX module's strftime() function +has been extended in a non-standard way to use a C<%C> format, which they +sometimes claim is the "century". It isn't, because on most such systems, +this is only the first two digits of the four-digit year, and thus cannot +be used to reliably determine the current century or millennium. + =head2 How can I compare two dates and find the difference? If you're storing your dates as epoch seconds then simply subtract one from the other. If you've got a structured date (distinct year, day, -month, hour, minute, seconds values) then use one of the Date::Manip -and Date::Calc modules from CPAN. +month, hour, minute, seconds values), then for reasons of accessibility, +simplicity, and efficiency, merely use either timelocal or timegm (from +the Time::Local module in the standard distribution) to reduce structured +dates to epoch seconds. However, if you don't know the precise format of +your dates, then you should probably use either of the Date::Manip and +Date::Calc modules from CPAN before you go hacking up your own parsing +routine to handle arbitrary date formats. =head2 How can I take a string and turn it into epoch seconds? @@ -244,23 +282,81 @@ and Date::Manip modules from CPAN. =head2 How can I find the Julian Day? -Neither Date::Manip nor Date::Calc deal with Julian days. Instead, -there is an example of Julian date calculation that should help you in -Time::JulianDay (part of the Time-modules bundle) which can be found at -http://www.perl.com/CPAN/modules/by-module/Time/. - +Use the Time::JulianDay module (part of the Time-modules bundle +available from CPAN.) + +Before you immerse yourself too deeply in this, be sure to verify that it +is the I<Julian> Day you really want. Are they really just interested in +a way of getting serial days so that they can do date arithmetic? If you +are interested in performing date arithmetic, this can be done using +either Date::Manip or Date::Calc, without converting to Julian Day first. + +There is too much confusion on this issue to cover in this FAQ, but the +term is applied (correctly) to a calendar now supplanted by the Gregorian +Calendar, with the Julian Calendar failing to adjust properly for leap +years on centennial years (among other annoyances). The term is also used +(incorrectly) to mean: [1] days in the Gregorian Calendar; and [2] days +since a particular starting time or `epoch', usually 1970 in the Unix +world and 1980 in the MS-DOS/Windows world. If you find that it is not +the first meaning that you really want, then check out the Date::Manip +and Date::Calc modules. (Thanks to David Cassell for most of this text.) =head2 How do I find yesterday's date? The C<time()> function returns the current time in seconds since the -epoch. Take one day off that: +epoch. Take twenty-four hours off that: $yesterday = time() - ( 24 * 60 * 60 ); Then you can pass this to C<localtime()> and get the individual year, month, day, hour, minute, seconds values. -=head2 Does Perl have a year 2000 problem? Is Perl Y2K compliant? +Note very carefully that the code above assumes that your days are +twenty-four hours each. For most people, there are two days a year +when they aren't: the switch to and from summer time throws this off. +A solution to this issue is offered by Russ Allbery. + + sub yesterday { + my $now = defined $_[0] ? $_[0] : time; + my $then = $now - 60 * 60 * 24; + my $ndst = (localtime $now)[8] > 0; + my $tdst = (localtime $then)[8] > 0; + $then - ($tdst - $ndst) * 60 * 60; + } + # Should give you "this time yesterday" in seconds since epoch relative to + # the first argument or the current time if no argument is given and + # suitable for passing to localtime or whatever else you need to do with + # it. $ndst is whether we're currently in daylight savings time; $tdst is + # whether the point 24 hours ago was in daylight savings time. If $tdst + # and $ndst are the same, a boundary wasn't crossed, and the correction + # will subtract 0. If $tdst is 1 and $ndst is 0, subtract an hour more + # from yesterday's time since we gained an extra hour while going off + # daylight savings time. If $tdst is 0 and $ndst is 1, subtract a + # negative hour (add an hour) to yesterday's time since we lost an hour. + # + # All of this is because during those days when one switches off or onto + # DST, a "day" isn't 24 hours long; it's either 23 or 25. + # + # The explicit settings of $ndst and $tdst are necessary because localtime + # only says it returns the system tm struct, and the system tm struct at + # least on Solaris doesn't guarantee any particular positive value (like, + # say, 1) for isdst, just a positive value. And that value can + # potentially be negative, if DST information isn't available (this sub + # just treats those cases like no DST). + # + # Note that between 2am and 3am on the day after the time zone switches + # off daylight savings time, the exact hour of "yesterday" corresponding + # to the current hour is not clearly defined. Note also that if used + # between 2am and 3am the day after the change to daylight savings time, + # the result will be between 3am and 4am of the previous day; it's + # arguable whether this is correct. + # + # This sub does not attempt to deal with leap seconds (most things don't). + # + # Copyright relinquished 1999 by Russ Allbery <rra@stanford.edu> + # This code is in the public domain + +=head2 Does Perl have a Year 2000 problem? Is Perl Y2K compliant? Short answer: No, Perl does not have a Year 2000 problem. Yes, Perl is Y2K compliant (whatever that means). The programmers you've hired to @@ -271,7 +367,7 @@ Perl is just as Y2K compliant as your pencil--no more, and no less. Can you use your pencil to write a non-Y2K-compliant memo? Of course you can. Is that the pencil's fault? Of course it isn't. -The date and time functions supplied with perl (gmtime and localtime) +The date and time functions supplied with Perl (gmtime and localtime) supply adequate information to determine the year well beyond 2000 (2038 is when trouble strikes for 32-bit machines). The year returned by these functions when used in an array context is the year minus 1900. @@ -312,7 +408,11 @@ This won't expand C<"\n"> or C<"\t"> or any other special escapes. To turn C<"abbcccd"> into C<"abccd">: - s/(.)\1/$1/g; + s/(.)\1/$1/g; # add /s to include newlines + +Here's a solution that turns "abbcccd" to "abcd": + + y///cs; # y == tr, but shorter :-) =head2 How do I expand function calls in a string? @@ -344,16 +444,14 @@ nested patterns, nor can they. For that you'll have to write a parser. If you are serious about writing a parser, there are a number of -modules or oddities that will make your life a lot easier. There is -the CPAN module Parse::RecDescent, the standard module Text::Balanced, -the byacc program, the CPAN module Parse::Yapp, and Mark-Jason -Dominus's excellent I<py> tool at http://www.plover.com/~mjd/perl/py/ -. +modules or oddities that will make your life a lot easier. There are +the CPAN modules Parse::RecDescent, Parse::Yapp, and Text::Balanced; +and the byacc program. One simple destructive, inside-out approach that you might try is to pull out the smallest nesting parts one at a time: - while (s//BEGIN((?:(?!BEGIN)(?!END).)*)END/gs) { + while (s/BEGIN((?:(?!BEGIN)(?!END).)*)END//gs) { # do something with $1 } @@ -365,7 +463,7 @@ really does work: # $_ contains the string to parse # BEGIN and END are the opening and closing markers for the # nested text. - + @( = ('(',''); @) = (')',''); ($re=$_)=~s/((BEGIN)|(END)|.)/$)[!$3]\Q$1\E$([!$2]/gs; @@ -385,7 +483,7 @@ You can do it yourself: 1 while $string =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; -Or you can just use the Text::Tabs module (part of the standard perl +Or you can just use the Text::Tabs module (part of the standard Perl distribution). use Text::Tabs; @@ -393,7 +491,7 @@ distribution). =head2 How do I reformat a paragraph? -Use Text::Wrap (part of the standard perl distribution): +Use Text::Wrap (part of the standard Perl distribution): use Text::Wrap; print wrap("\t", ' ', @paragraphs); @@ -422,24 +520,25 @@ likely prefer: You have to keep track of N yourself. For example, let's say you want to change the fifth occurrence of C<"whoever"> or C<"whomever"> into -C<"whosoever"> or C<"whomsoever">, case insensitively. +C<"whosoever"> or C<"whomsoever">, case insensitively. These +all assume that $_ contains the string to be altered. $count = 0; s{((whom?)ever)}{ ++$count == 5 # is it the 5th? ? "${2}soever" # yes, swap : $1 # renege and leave it there - }igex; + }ige; In the more general case, you can use the C</g> modifier in a C<while> loop, keeping count of matches. $WANT = 3; $count = 0; + $_ = "One fish two fish red fish blue fish"; while (/(\w+)\s+fish\b/gi) { if (++$count == $WANT) { print "The third fish is a $1 one.\n"; - # Warning: don't `last' out of this loop } } @@ -456,7 +555,7 @@ C<tr///> function like so: $string = "ThisXlineXhasXsomeXx'sXinXit"; $count = ($string =~ tr/X//); - print "There are $count X charcters in the string"; + print "There are $count X characters in the string"; This is fine if you are just looking for a single character. However, if you are trying to count multiple character substrings within a @@ -475,8 +574,8 @@ To make the first letter of each word upper case: $line =~ s/\b(\w)/\U$1/g; This has the strange effect of turning "C<don't do it>" into "C<Don'T -Do It>". Sometimes you might want this, instead (Suggested by Brian -Foy): +Do It>". Sometimes you might want this, instead (Suggested by brian d. +foy): $string =~ s/ ( (^\w) #at the beginning of the line @@ -499,7 +598,7 @@ characters by placing a C<use locale> pragma in your program. See L<perllocale> for endless details on locales. This is sometimes referred to as putting something into "title -case", but that's not quite accurate. Consdier the proper +case", but that's not quite accurate. Consider the proper capitalization of the movie I<Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb>, for example. @@ -532,7 +631,7 @@ quotation-mark-delimited field, escape them with backslashes (eg, C<"like \"this\"">. Unescaping them is a task addressed earlier in this section. -Alternatively, the Text::ParseWords module (part of the standard perl +Alternatively, the Text::ParseWords module (part of the standard Perl distribution) lets you say: use Text::ParseWords; @@ -546,8 +645,8 @@ Although the simplest approach would seem to be: $string =~ s/^\s*(.*?)\s*$/$1/; -This is unnecessarily slow, destructive, and fails with embedded newlines. -It is much better faster to do this in two steps: +Not only is this unnecessarily slow and destructive, it also fails with +embedded newlines. It is much faster to do this operation in two steps: $string =~ s/^\s+//; $string =~ s/\s+$//; @@ -562,7 +661,7 @@ Or more nicely written as: This idiom takes advantage of the C<foreach> loop's aliasing behavior to factor out common code. You can do this on several strings at once, or arrays, or even the -values of a hash if you use a slide: +values of a hash if you use a slice: # trim whitespace in the scalar, the array, # and all the values in the hash @@ -573,41 +672,48 @@ values of a hash if you use a slide: =head2 How do I pad a string with blanks or pad a number with zeroes? -(This answer contributed by Uri Guttman) +(This answer contributed by Uri Guttman, with kibitzing from +Bart Lateur.) In the following examples, C<$pad_len> is the length to which you wish -to pad the string, C<$text> or C<$num> contains the string to be -padded, and C<$pad_char> contains the padding character. You can use a -single character string constant instead of the C<$pad_char> variable -if you know what it is in advance. +to pad the string, C<$text> or C<$num> contains the string to be padded, +and C<$pad_char> contains the padding character. You can use a single +character string constant instead of the C<$pad_char> variable if you +know what it is in advance. And in the same way you can use an integer in +place of C<$pad_len> if you know the pad length in advance. -The simplest method use the C<sprintf> function. It can pad on the -left or right with blanks and on the left with zeroes. +The simplest method uses the C<sprintf> function. It can pad on the left +or right with blanks and on the left with zeroes and it will not +truncate the result. The C<pack> function can only pad strings on the +right with blanks and it will truncate the result to a maximum length of +C<$pad_len>. - # Left padding with blank: - $padded = sprintf( "%${pad_len}s", $text ) ; + # Left padding a string with blanks (no truncation): + $padded = sprintf("%${pad_len}s", $text); - # Right padding with blank: - $padded = sprintf( "%${pad_len}s", $text ) ; + # Right padding a string with blanks (no truncation): + $padded = sprintf("%-${pad_len}s", $text); - # Left padding with 0: - $padded = sprintf( "%0${pad_len}d", $num ) ; + # Left padding a number with 0 (no truncation): + $padded = sprintf("%0${pad_len}d", $num); -If you need to pad with a character other than blank or zero you can use -one of the following methods. + # Right padding a string with blanks using pack (will truncate): + $padded = pack("A$pad_len",$text); -These methods generate a pad string with the C<x> operator and -concatenate that with the original text. +If you need to pad with a character other than blank or zero you can use +one of the following methods. They all generate a pad string with the +C<x> operator and combine that with C<$text>. These methods do +not truncate C<$text>. -Left and right padding with any character: +Left and right padding with any character, creating a new string: - $padded = $pad_char x ( $pad_len - length( $text ) ) . $text ; - $padded = $text . $pad_char x ( $pad_len - length( $text ) ) ; + $padded = $pad_char x ( $pad_len - length( $text ) ) . $text; + $padded = $text . $pad_char x ( $pad_len - length( $text ) ); -Or you can left or right pad $text directly: +Left and right padding with any character, modifying C<$text> directly: - $text .= $pad_char x ( $pad_len - length( $text ) ) ; - substr( $text, 0, 0 ) = $pad_char x ( $pad_len - length( $text ) ) ; + substr( $text, 0, 0 ) = $pad_char x ( $pad_len - length( $text ) ); + $text .= $pad_char x ( $pad_len - length( $text ) ); =head2 How do I extract selected columns from a string? @@ -633,7 +739,14 @@ you can use this kind of thing: =head2 How do I find the soundex value of a string? -Use the standard Text::Soundex module distributed with perl. +Use the standard Text::Soundex module distributed with Perl. +But before you do so, you may want to determine whether `soundex' is in +fact what you think it is. Knuth's soundex algorithm compresses words +into a small space, and so it does not necessarily distinguish between +two words which you might want to appear separately. For example, the +last names `Knuth' and `Kant' are both mapped to the soundex code K530. +If Text::Soundex does not do what you are looking for, you might want +to consider the String::Approx module available at CPAN. =head2 How can I expand variables in text strings? @@ -706,7 +819,7 @@ Stringification also destroys arrays. print "@lines"; # WRONG - extra blanks print @lines; # right -=head2 Why don't my E<lt>E<lt>HERE documents work? +=head2 Why don't my <<HERE documents work? Check for these three things: @@ -767,7 +880,7 @@ This works with leading special strings, dynamically determined: @@@ runops() { @@@ SAVEI32(runlevel); @@@ runlevel++; - @@@ while ( op = (*op->op_ppaddr)() ) ; + @@@ while ( op = (*op->op_ppaddr)() ); @@@ TAINT_NOT; @@@ return 0; @@@ } @@ -805,9 +918,9 @@ When you say $scalar = (2, 5, 7, 9); -you're using the comma operator in scalar context, so it evaluates the -left hand side, then evaluates and returns the left hand side. This -causes the last value to be returned: 9. +you're using the comma operator in scalar context, so it uses the scalar +comma operator. There never was a list there at all! This causes the +last value to be returned: 9. =head2 What is the difference between $array[1] and @array[1]? @@ -825,9 +938,10 @@ with @bad[0] = `same program that outputs several lines`; -The B<-w> flag will warn you about these matters. +The C<use warnings> pragma and the B<-w> flag will warn you about these +matters. -=head2 How can I extract just the unique elements of an array? +=head2 How can I remove duplicate elements from a list or array? There are several possible ways, depending on whether the array is ordered and whether you wish to preserve the ordering. @@ -843,7 +957,7 @@ ordered and whether you wish to preserve the ordering. This is nice in that it doesn't use much extra memory, simulating uniq(1)'s behavior of removing only adjacent duplicates. It's less nice in that it won't work with false values like undef, 0, or ""; -"0 but true" is ok, though. +"0 but true" is OK, though. =item b) If you don't know whether @in is sorted: @@ -864,7 +978,7 @@ nice in that it won't work with false values like undef, 0, or ""; undef @ary; @ary[@in] = @in; - @out = @ary; + @out = grep {defined} @ary; =back @@ -893,7 +1007,8 @@ array. This kind of an array will take up less space: @primes = (2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31); undef @is_tiny_prime; - for (@primes) { $is_tiny_prime[$_] = 1; } + for (@primes) { $is_tiny_prime[$_] = 1 } + # or simply @istiny_prime[@primes] = (1) x @primes; Now you check whether $is_tiny_prime[$some_number]. @@ -916,7 +1031,7 @@ or worse yet These are slow (checks every element even if the first matches), inefficient (same reason), and potentially buggy (what if there are -regexp characters in $whatever?). If you're only testing once, then +regex characters in $whatever?). If you're only testing once, then use: $is_there = 0; @@ -941,6 +1056,9 @@ each element is unique in a given array: push @{ $count{$element} > 1 ? \@intersection : \@difference }, $element; } +Note that this is the I<symmetric difference>, that is, all elements in +either A or in B, but not in both. Think of it as an xor operation. + =head2 How do I test whether two arrays or hashes are equal? The following code works for single-level arrays. It uses a stringwise @@ -951,7 +1069,7 @@ strings. Modify if you have other needs. sub compare_arrays { my ($first, $second) = @_; - local $^W = 0; # silence spurious -w undef complaints + no warnings; # silence spurious -w undef complaints return 0 unless @$first == @$second; for (my $i = 0; $i < @$first; $i++) { return 0 if $first->[$i] ne $second->[$i]; @@ -1008,7 +1126,7 @@ Now C<$found_index> has what you want. In general, you usually don't need a linked list in Perl, since with regular arrays, you can push and pop or shift and unshift at either end, or you can use splice to add and/or remove arbitrary number of elements at -arbitrary points. Both pop and shift are both O(1) operations on perl's +arbitrary points. Both pop and shift are both O(1) operations on Perl's dynamic arrays. In the absence of shifts and pops, push in general needs to reallocate on the order every log(N) times, and unshift will need to copy pointers each time. @@ -1078,7 +1196,7 @@ Use this: fisher_yates_shuffle( \@array ); # permutes @array in place -You've probably seen shuffling algorithms that works using splice, +You've probably seen shuffling algorithms that work using splice, randomly picking another element to swap the current element with: srand; @@ -1163,7 +1281,7 @@ Supply a comparison function to sort() (described in L<perlfunc/sort>): @list = sort { $a <=> $b } @list; The default sort function is cmp, string comparison, which would -sort C<(1, 2, 10)> into C<(1, 10, 2)>. C<E<lt>=E<gt>>, used above, is +sort C<(1, 2, 10)> into C<(1, 10, 2)>. C<< <=> >>, used above, is the numerical comparison operator. If you have a complicated function needed to pull out the part you @@ -1185,7 +1303,7 @@ that's come to be known as the Schwartzian Transform: @sorted = map { $_->[0] } sort { $a->[1] cmp $b->[1] } - map { [ $_, uc((/\d+\s*(\S+)/ )[0] ] } @data; + map { [ $_, uc( (/\d+\s*(\S+)/)[0]) ] } @data; If you need to sort on several fields, the following paradigm is useful. @@ -1311,7 +1429,19 @@ sorting the keys as shown in an earlier question. =head2 What happens if I add or remove keys from a hash while iterating over it? -Don't do that. +Don't do that. :-) + +[lwall] In Perl 4, you were not allowed to modify a hash at all while +iterating over it. In Perl 5 you can delete from it, but you still +can't add to it, because that might cause a doubling of the hash table, +in which half the entries get copied up to the new top half of the +table, at which point you've totally bamboozled the iterator code. +Even if the table doesn't double, there's no telling whether your new +entry will be inserted before or after the current iterator position. + +Either treasure up your changes and make them after the iterator finishes, +or use keys to fetch all the old keys at once, and iterate over the list +of keys. =head2 How do I look up a hash element by value? @@ -1327,8 +1457,13 @@ to use: $by_value{$value} = $key; } -If your hash could have repeated values, the methods above will only -find one of the associated keys. This may or may not worry you. +If your hash could have repeated values, the methods above will only find +one of the associated keys. This may or may not worry you. If it does +worry you, you can always reverse the hash into a hash of arrays instead: + + while (($key, $value) = each %by_key) { + push @{$key_list_by_value{$value}}, $key; + } =head2 How can I know how many entries are in a hash? @@ -1337,8 +1472,9 @@ take the scalar sense of the keys() function: $num_keys = scalar keys %hash; -In void context it just resets the iterator, which is faster -for tied hashes. +In void context, the keys() function just resets the iterator, which is +faster for tied hashes than would be iterating through the whole +hash, one key-value pair at a time. =head2 How do I sort a hash (optionally by value instead of key)? @@ -1396,7 +1532,7 @@ And these conditions hold $ary{'d'} is false defined $ary{'d'} is true defined $ary{'a'} is true - exists $ary{'a'} is true (perl5 only) + exists $ary{'a'} is true (Perl5 only) grep ($_ eq 'a', keys %ary) is true If you now say @@ -1420,7 +1556,7 @@ and these conditions now hold; changes in caps: $ary{'d'} is false defined $ary{'d'} is true defined $ary{'a'} is FALSE - exists $ary{'a'} is true (perl5 only) + exists $ary{'a'} is true (Perl5 only) grep ($_ eq 'a', keys %ary) is true Notice the last two: you have an undef value, but a defined key! @@ -1444,7 +1580,7 @@ and these conditions now hold; changes in caps: $ary{'d'} is false defined $ary{'d'} is true defined $ary{'a'} is false - exists $ary{'a'} is FALSE (perl5 only) + exists $ary{'a'} is FALSE (Perl5 only) grep ($_ eq 'a', keys %ary) is FALSE See, the whole entry is gone! @@ -1467,8 +1603,8 @@ re-enter it, the hash iterator has been reset. =head2 How can I get the unique keys from two hashes? -First you extract the keys from the hashes into arrays, and then solve -the uniquifying the array problem described above. For example: +First you extract the keys from the hashes into lists, then solve +the "removing duplicates" problem described above. For example: %seen = (); for $element (keys(%foo), keys(%bar)) { @@ -1520,7 +1656,7 @@ whether you store something there or not. That's because functions get scalars passed in by reference. If somefunc() modifies C<$_[0]>, it has to be ready to write it back into the caller's version. -This has been fixed as of perl5.004. +This has been fixed as of Perl5.004. Normally, merely accessing a key's value for a nonexistent key does I<not> cause that key to be forever there. This is different than @@ -1547,7 +1683,7 @@ in L<perltoot>. =head2 How can I use a reference as a hash key? You can't do this directly, but you could use the standard Tie::Refhash -module distributed with perl. +module distributed with Perl. =head1 Data: Misc @@ -1560,9 +1696,11 @@ this works fine (assuming the files are found): print "Your kernel is GNU-zip enabled!\n"; } -On some legacy systems, however, you have to play tedious games with -"text" versus "binary" files. See L<perlfunc/"binmode">, or the upcoming -L<perlopentut> manpage. +On less elegant (read: Byzantine) systems, however, you have +to play tedious games with "text" versus "binary" files. See +L<perlfunc/"binmode"> or L<perlopentut>. Most of these ancient-thinking +systems are curses out of Microsoft, who seem to be committed to putting +the backward into backward compatibility. If you're concerned about 8-bit ASCII data, then see L<perllocale>. @@ -1604,13 +1742,12 @@ if you just want to say, ``Is this a float?'' } } - sub is_numeric { defined &getnum } + sub is_numeric { defined getnum($_[0]) } -Or you could check out String::Scanf which can be found at -http://www.perl.com/CPAN/modules/by-module/String/. -The POSIX module (part of the standard Perl distribution) provides -the C<strtol> and C<strtod> for converting strings to double -and longs, respectively. +Or you could check out the String::Scanf module on CPAN instead. The +POSIX module (part of the standard Perl distribution) provides the +C<strtol> and C<strtod> for converting strings to double and longs, +respectively. =head2 How do I keep persistent data across program calls? @@ -1663,7 +1800,7 @@ All rights reserved. When included as part of the Standard Version of Perl, or as part of its complete documentation whether printed or otherwise, this work -may be distributed only under the terms of Perl's Artistic Licence. +may be distributed only under the terms of Perl's Artistic License. Any distribution of this file or derivatives thereof I<outside> of that package require that special arrangements be made with copyright holder. @@ -1673,4 +1810,3 @@ are hereby placed into the public domain. You are permitted and encouraged to use this code in your own programs for fun or for profit as you see fit. A simple comment in the code giving credit would be courteous but is not required. - diff --git a/contrib/perl5/pod/perlfaq5.pod b/contrib/perl5/pod/perlfaq5.pod index 99c25b7..6ae7755 100644 --- a/contrib/perl5/pod/perlfaq5.pod +++ b/contrib/perl5/pod/perlfaq5.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq5 - Files and Formats ($Revision: 1.34 $, $Date: 1999/01/08 05:46:13 $) +perlfaq5 - Files and Formats ($Revision: 1.38 $, $Date: 1999/05/23 16:08:30 $) =head1 DESCRIPTION @@ -69,7 +69,7 @@ or even this: Note the bizarrely hardcoded carriage return and newline in their octal equivalents. This is the ONLY way (currently) to assure a proper flush -on all platforms, including Macintosh. That the way things work in +on all platforms, including Macintosh. That's the way things work in network programming: you really should specify the exact bit pattern on the network line terminator. In practice, C<"\n\n"> often works, but this is not portable. @@ -347,7 +347,7 @@ Then use any of those as you would a normal filehandle. Anywhere that Perl is expecting a filehandle, an indirect filehandle may be used instead. An indirect filehandle is just a scalar variable that contains a filehandle. Functions like C<print>, C<open>, C<seek>, or -the C<E<lt>FHE<gt>> diamond operator will accept either a read filehandle +the C<< <FH> >> diamond operator will accept either a read filehandle or a scalar variable containing one: ($ifh, $ofh, $efh) = (*STDIN, *STDOUT, *STDERR); @@ -407,7 +407,7 @@ calls doesn't work for the diamond operator. That's because it's a real operator, not just a function with a comma-less argument. Assuming you've been storing typeglobs in your structure as we did above, you can use the built-in function named C<readline> to reads a record just -as C<E<lt>E<gt>> does. Given the initialization shown above for @fd, this +as C<< <> >> does. Given the initialization shown above for @fd, this would work, but only because readline() require a typeglob. It doesn't work with objects or strings, which might be a bug we haven't fixed yet. @@ -463,7 +463,7 @@ whatever: =head2 How can I translate tildes (~) in a filename? -Use the E<lt>E<gt> (glob()) operator, documented in L<perlfunc>. This +Use the <> (glob()) operator, documented in L<perlfunc>. This requires that you have a shell installed that groks tildes, meaning csh or tcsh or (some versions of) ksh, and thus may have portability problems. The Glob::KGlob module (available from CPAN) gives more @@ -491,8 +491,12 @@ I<then> gives you read-write access: open(FH, "+> /path/name"); # WRONG (almost always) Whoops. You should instead use this, which will fail if the file -doesn't exist. Using "E<gt>" always clobbers or creates. -Using "E<lt>" never does either. The "+" doesn't change this. +doesn't exist. + + open(FH, "+< /path/name"); # open for update + +Using ">" always clobbers or creates. Using "<" never does +either. The "+" doesn't change this. Here are examples of many kinds of file opens. Those using sysopen() all assume @@ -550,19 +554,20 @@ be an atomic operation over NFS. That is, two processes might both successful create or unlink the same file! Therefore O_EXCL isn't so exclusive as you might wish. -See also the new L<perlopentut> if you have it (new for 5.006). +See also the new L<perlopentut> if you have it (new for 5.6). -=head2 Why do I sometimes get an "Argument list too long" when I use E<lt>*E<gt>? +=head2 Why do I sometimes get an "Argument list too long" when I use <*>? -The C<E<lt>E<gt>> operator performs a globbing operation (see above). -By default glob() forks csh(1) to do the actual glob expansion, but +The C<< <> >> operator performs a globbing operation (see above). +In Perl versions earlier than v5.6.0, the internal glob() operator forks +csh(1) to do the actual glob expansion, but csh can't handle more than 127 items and so gives the error message C<Argument list too long>. People who installed tcsh as csh won't have this problem, but their users may be surprised by it. -To get around this, either do the glob yourself with readdir() and -patterns, or use a module like Glob::KGlob, one that doesn't use the -shell to do globbing. This is expected to be fixed soon. +To get around this, either upgrade to Perl v5.6.0 or later, do the glob +yourself with readdir() and patterns, or use a module like Glob::KGlob, +one that doesn't use the shell to do globbing. =head2 Is there a leak/bug in glob()? @@ -571,7 +576,7 @@ use the glob() function or its angle-bracket alias in a scalar context, you may cause a leak and/or unpredictable behavior. It's best therefore to use glob() only in list context. -=head2 How can I open a file with a leading "E<gt>" or trailing blanks? +=head2 How can I open a file with a leading ">" or trailing blanks? Normally perl ignores trailing blanks in filenames, and interprets certain leading characters (or a trailing "|") to mean something @@ -602,14 +607,18 @@ It would be a lot clearer to use sysopen(), though: or die "can't open $badpath: $!"; For more information, see also the new L<perlopentut> if you have it -(new for 5.006). +(new for 5.6). =head2 How can I reliably rename a file? -Well, usually you just use Perl's rename() function. But that may -not work everywhere, in particular, renaming files across file systems. -If your operating system supports a mv(1) program or its moral equivalent, -this works: +Well, usually you just use Perl's rename() function. But that may not +work everywhere, in particular, renaming files across file systems. +Some sub-Unix systems have broken ports that corrupt the semantics of +rename() -- for example, WinNT does this right, but Win95 and Win98 +are broken. (The last two parts are not surprising, but the first is. :-) + +If your operating system supports a proper mv(1) program or its moral +equivalent, this works: rename($old, $new) or system("mv", $old, $new); @@ -643,14 +652,28 @@ filehandle be open for writing (or appending, or read/writing). =item 3 -Some versions of flock() can't lock files over a network (e.g. on NFS -file systems), so you'd need to force the use of fcntl(2) when you -build Perl. See the flock entry of L<perlfunc>, and the F<INSTALL> -file in the source distribution for information on building Perl to do -this. +Some versions of flock() can't lock files over a network (e.g. on NFS file +systems), so you'd need to force the use of fcntl(2) when you build Perl. +But even this is dubious at best. See the flock entry of L<perlfunc>, +and the F<INSTALL> file in the source distribution for information on +building Perl to do this. + +Two potentially non-obvious but traditional flock semantics are that +it waits indefinitely until the lock is granted, and that its locks +I<merely advisory>. Such discretionary locks are more flexible, but +offer fewer guarantees. This means that files locked with flock() may +be modified by programs that do not also use flock(). Cars that stop +for red lights get on well with each other, but not with cars that don't +stop for red lights. See the perlport manpage, your port's specific +documentation, or your system-specific local manpages for details. It's +best to assume traditional behavior if you're writing portable programs. +(But if you're not, you should as always feel perfectly free to write +for your own system's idiosyncrasies (sometimes called "features"). +Slavish adherence to portability concerns shouldn't get in the way of +your getting your job done.) For more information on file locking, see also L<perlopentut/"File -Locking"> if you have it (new for 5.006). +Locking"> if you have it (new for 5.6). =back @@ -797,6 +820,58 @@ at http://www.perl.com/CPAN/authors/id/TOMC/scripts/tct.gz, which is written in Perl and offers much greater functionality than the stock version. +=head2 How can I read in an entire file all at once? + +The customary Perl approach for processing all the lines in a file is to +do so one line at a time: + + open (INPUT, $file) || die "can't open $file: $!"; + while (<INPUT>) { + chomp; + # do something with $_ + } + close(INPUT) || die "can't close $file: $!"; + +This is tremendously more efficient than reading the entire file into +memory as an array of lines and then processing it one element at a time, +which is often -- if not almost always -- the wrong approach. Whenever +you see someone do this: + + @lines = <INPUT>; + +You should think long and hard about why you need everything loaded +at once. It's just not a scalable solution. You might also find it +more fun to use the the standard DB_File module's $DB_RECNO bindings, +which allow you to tie an array to a file so that accessing an element +the array actually accesses the corresponding line in the file. + +On very rare occasion, you may have an algorithm that demands that +the entire file be in memory at once as one scalar. The simplest solution +to that is: + + $var = `cat $file`; + +Being in scalar context, you get the whole thing. In list context, +you'd get a list of all the lines: + + @lines = `cat $file`; + +This tiny but expedient solution is neat, clean, and portable to +all systems on which decent tools have been installed. For those +who prefer not to use the toolbox, you can of course read the file +manually, although this makes for more complicated code. + + { + local(*INPUT, $/); + open (INPUT, $file) || die "can't open $file: $!"; + $var = <INPUT>; + } + +That temporarily undefs your record separator, and will automatically +close the file at block exit. If the file is already open, just use this: + + $var = do { local $/; <INPUT> }; + =head2 How can I read in a file by paragraphs? Use the C<$/> variable (see L<perlvar> for details). You can either @@ -1025,7 +1100,7 @@ Or even with a literal numeric descriptor: $fd = $ENV{MHCONTEXTFD}; open(MHCONTEXT, "<&=$fd"); # like fdopen(3S) -Note that "E<lt>&STDIN" makes a copy, but "E<lt>&=STDIN" make +Note that "<&STDIN" makes a copy, but "<&=STDIN" make an alias. That means if you close an aliased handle, all aliases become inaccessible. This is not true with a copied one. @@ -1043,6 +1118,14 @@ to, you may be able to do this: $rc = syscall(&SYS_close, $fd + 0); # must force numeric die "can't sysclose $fd: $!" unless $rc == -1; +Or just use the fdopen(3S) feature of open(): + + { + local *F; + open F, "<&=$fd" or die "Cannot reopen fd=$fd: $!"; + close F; + } + =head2 Why can't I use "C:\temp\foo" in DOS paths? What doesn't `C:\temp\foo.exe` work? Whoops! You just put a tab and a formfeed into that filename! @@ -1121,13 +1204,12 @@ Copyright (c) 1997-1999 Tom Christiansen and Nathan Torkington. All rights reserved. When included as an integrated part of the Standard Distribution -of Perl or of its documentation (printed or otherwise), this work is -covered under Perl's Artistic Licence. For separate distributions of +of Perl or of its documentation (printed or otherwise), this works is +covered under Perl's Artistic License. For separate distributions of all or part of this FAQ outside of that, see L<perlfaq>. -Irrespective of its distribution, all code examples here are public +Irrespective of its distribution, all code examples here are in the public domain. You are permitted and encouraged to use this code and any derivatives thereof in your own programs for fun or for profit as you see fit. A simple comment in the code giving credit to the FAQ would be courteous but is not required. - diff --git a/contrib/perl5/pod/perlfaq6.pod b/contrib/perl5/pod/perlfaq6.pod index 234570d..bf007ee 100644 --- a/contrib/perl5/pod/perlfaq6.pod +++ b/contrib/perl5/pod/perlfaq6.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq6 - Regexps ($Revision: 1.25 $, $Date: 1999/01/08 04:50:47 $) +perlfaq6 - Regexes ($Revision: 1.27 $, $Date: 1999/05/23 16:08:30 $) =head1 DESCRIPTION @@ -18,7 +18,7 @@ understandable. =over 4 -=item Comments Outside the Regexp +=item Comments Outside the Regex Describe what you're doing and how you're doing it, using normal Perl comments. @@ -27,9 +27,9 @@ comments. # number of characters on the rest of the line s/^(\w+)(.*)/ lc($1) . ":" . length($2) /meg; -=item Comments Inside the Regexp +=item Comments Inside the Regex -The C</x> modifier causes whitespace to be ignored in a regexp pattern +The C</x> modifier causes whitespace to be ignored in a regex pattern (except in a character class), and also allows you to use normal comments there, too. As you can imagine, whitespace and comments help a lot. @@ -177,11 +177,46 @@ appear within a certain time. =head2 How do I substitute case insensitively on the LHS, but preserving case on the RHS? -It depends on what you mean by "preserving case". The following -script makes the substitution have the same case, letter by letter, as -the original. If the substitution has more characters than the string -being substituted, the case of the last character is used for the rest -of the substitution. +Here's a lovely Perlish solution by Larry Rosler. It exploits +properties of bitwise xor on ASCII strings. + + $_= "this is a TEsT case"; + + $old = 'test'; + $new = 'success'; + + s{(\Q$old\E} + { uc $new | (uc $1 ^ $1) . + (uc(substr $1, -1) ^ substr $1, -1) x + (length($new) - length $1) + }egi; + + print; + +And here it is as a subroutine, modelled after the above: + + sub preserve_case($$) { + my ($old, $new) = @_; + my $mask = uc $old ^ $old; + + uc $new | $mask . + substr($mask, -1) x (length($new) - length($old)) + } + + $a = "this is a TEsT case"; + $a =~ s/(test)/preserve_case($1, "success")/egi; + print "$a\n"; + +This prints: + + this is a SUcCESS case + +Just to show that C programmers can write C in any programming language, +if you prefer a more C-like solution, the following script makes the +substitution have the same case, letter by letter, as the original. +(It also happens to run about 240% slower than the Perlish solution runs.) +If the substitution has more characters than the string being substituted, +the case of the last character is used for the rest of the substitution. # Original by Nathan Torkington, massaged by Jeffrey Friedl # @@ -214,14 +249,6 @@ of the substitution. return $new; } - $a = "this is a TEsT case"; - $a =~ s/(test)/preserve_case($1, "success")/gie; - print "$a\n"; - -This prints: - - this is a SUcCESS case - =head2 How can I make C<\w> match national character sets? See L<perllocale>. @@ -232,41 +259,41 @@ One alphabetic character would be C</[^\W\d_]/>, no matter what locale you're in. Non-alphabetics would be C</[\W\d_]/> (assuming you don't consider an underscore a letter). -=head2 How can I quote a variable to use in a regexp? +=head2 How can I quote a variable to use in a regex? The Perl parser will expand $variable and @variable references in regular expressions unless the delimiter is a single quote. Remember, too, that the right-hand side of a C<s///> substitution is considered a double-quoted string (see L<perlop> for more details). Remember -also that any regexp special characters will be acted on unless you +also that any regex special characters will be acted on unless you precede the substitution with \Q. Here's an example: $string = "to die?"; $lhs = "die?"; - $rhs = "sleep no more"; + $rhs = "sleep, no more"; $string =~ s/\Q$lhs/$rhs/; # $string is now "to sleep no more" -Without the \Q, the regexp would also spuriously match "di". +Without the \Q, the regex would also spuriously match "di". =head2 What is C</o> really for? Using a variable in a regular expression match forces a re-evaluation (and perhaps recompilation) each time through. The C</o> modifier -locks in the regexp the first time it's used. This always happens in a +locks in the regex the first time it's used. This always happens in a constant regular expression, and in fact, the pattern was compiled into the internal format at the same time your entire program was. Use of C</o> is irrelevant unless variable interpolation is used in -the pattern, and if so, the regexp engine will neither know nor care +the pattern, and if so, the regex engine will neither know nor care whether the variables change after the pattern is evaluated the I<very first> time. C</o> is often used to gain an extra measure of efficiency by not performing subsequent evaluations when you know it won't matter (because you know the variables won't change), or more rarely, when -you don't want the regexp to notice if they do. +you don't want the regex to notice if they do. For example, here's a "paragrep" program: @@ -286,23 +313,66 @@ For example, this one-liner will work in many but not all cases. You see, it's too simple-minded for certain kinds of C programs, in particular, those with what appear to be comments in quoted strings. For that, you'd need something like this, -created by Jeffrey Friedl: +created by Jeffrey Friedl and later modified by Fred Curtis. $/ = undef; $_ = <>; - s#/\*[^*]*\*+([^/*][^*]*\*+)*/|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|\n+|.[^/"'\\]*)#$2#g; + s#/\*[^*]*\*+([^/*][^*]*\*+)*/|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#$2#gs print; This could, of course, be more legibly written with the C</x> modifier, adding -whitespace and comments. +whitespace and comments. Here it is expanded, courtesy of Fred Curtis. + + s{ + /\* ## Start of /* ... */ comment + [^*]*\*+ ## Non-* followed by 1-or-more *'s + ( + [^/*][^*]*\*+ + )* ## 0-or-more things which don't start with / + ## but do end with '*' + / ## End of /* ... */ comment + + | ## OR various things which aren't comments: + + ( + " ## Start of " ... " string + ( + \\. ## Escaped char + | ## OR + [^"\\] ## Non "\ + )* + " ## End of " ... " string + + | ## OR + + ' ## Start of ' ... ' string + ( + \\. ## Escaped char + | ## OR + [^'\\] ## Non '\ + )* + ' ## End of ' ... ' string + + | ## OR + + . ## Anything other char + [^/"'\\]* ## Chars which doesn't start a comment, string or escape + ) + }{$2}gxs; + +A slight modification also removes C++ comments: + + s#/\*[^*]*\*+([^/*][^*]*\*+)*/|//[^\n]*|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#$2#gs; =head2 Can I use Perl regular expressions to match balanced text? Although Perl regular expressions are more powerful than "mathematical" regular expressions, because they feature conveniences like backreferences -(C<\1> and its ilk), they still aren't powerful enough. You still need -to use non-regexp techniques to parse balanced text, such as the text -enclosed between matching parentheses or braces, for example. +(C<\1> and its ilk), they still aren't powerful enough -- with +the possible exception of bizarre and experimental features in the +development-track releases of Perl. You still need to use non-regex +techniques to parse balanced text, such as the text enclosed between +matching parentheses or braces, for example. An elaborate subroutine (for 7-bit ASCII only) to pull out balanced and possibly nested single chars, like C<`> and C<'>, C<{> and C<}>, @@ -312,9 +382,9 @@ http://www.perl.com/CPAN/authors/id/TOMC/scripts/pull_quotes.gz . The C::Scan module from CPAN contains such subs for internal usage, but they are undocumented. -=head2 What does it mean that regexps are greedy? How can I get around it? +=head2 What does it mean that regexes are greedy? How can I get around it? -Most people mean that greedy regexps match as much as they can. +Most people mean that greedy regexes match as much as they can. Technically speaking, it's actually the quantifiers (C<?>, C<*>, C<+>, C<{}>) that are greedy rather than the whole pattern; Perl prefers local greed and immediate gratification to overall greed. To get non-greedy @@ -422,7 +492,7 @@ characters. Neither is correct. C<\b> is the place between a C<\w> character and a C<\W> character (that is, C<\b> is the edge of a "word"). It's a zero-width assertion, just like C<^>, C<$>, and all the other anchors, so it doesn't consume any characters. L<perlre> -describes the behaviour of all the regexp metacharacters. +describes the behavior of all the regex metacharacters. Here are examples of the incorrect application of C<\b>, with fixes: @@ -446,8 +516,8 @@ not "this" or "island". Because once Perl sees that you need one of these variables anywhere in the program, it has to provide them on each and every pattern match. The same mechanism that handles these provides for the use of $1, $2, -etc., so you pay the same price for each regexp that contains capturing -parentheses. But if you never use $&, etc., in your script, then regexps +etc., so you pay the same price for each regex that contains capturing +parentheses. But if you never use $&, etc., in your script, then regexes I<without> capturing parentheses won't be penalized. So avoid $&, $', and $` if you can, but if you can't, once you've used them at all, use them at will because you've already paid the price. Remember that some @@ -463,8 +533,8 @@ pos() point. A failed match resets the position of C<\G> unless the C</c> modifier is in effect. For example, suppose you had a line of text quoted in standard mail -and Usenet notation, (that is, with leading C<E<gt>> characters), and -you want change each leading C<E<gt>> into a corresponding C<:>. You +and Usenet notation, (that is, with leading C<< > >> characters), and +you want change each leading C<< > >> into a corresponding C<:>. You could do so in this way: s/^(>+)/':' x length($1)/gem; @@ -515,7 +585,7 @@ Of course, that could have been written as But then you lose the vertical alignment of the regular expressions. -=head2 Are Perl regexps DFAs or NFAs? Are they POSIX compliant? +=head2 Are Perl regexes DFAs or NFAs? Are they POSIX compliant? While it's true that Perl's regular expressions resemble the DFAs (deterministic finite automata) of the egrep(1) program, they are in @@ -620,7 +690,7 @@ All rights reserved. When included as part of the Standard Version of Perl, or as part of its complete documentation whether printed or otherwise, this work -may be distributed only under the terms of Perl's Artistic Licence. +may be distributed only under the terms of Perl's Artistic License. Any distribution of this file or derivatives thereof I<outside> of that package require that special arrangements be made with copyright holder. @@ -630,4 +700,3 @@ are hereby placed into the public domain. You are permitted and encouraged to use this code in your own programs for fun or for profit as you see fit. A simple comment in the code giving credit would be courteous but is not required. - diff --git a/contrib/perl5/pod/perlfaq7.pod b/contrib/perl5/pod/perlfaq7.pod index a4ea872..1ca7893 100644 --- a/contrib/perl5/pod/perlfaq7.pod +++ b/contrib/perl5/pod/perlfaq7.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq7 - Perl Language Issues ($Revision: 1.24 $, $Date: 1999/01/08 05:32:11 $) +perlfaq7 - Perl Language Issues ($Revision: 1.28 $, $Date: 1999/05/23 20:36:18 $) =head1 DESCRIPTION @@ -18,31 +18,29 @@ In the words of Chaim Frenkel: "Perl's grammar can not be reduced to BNF. The work of parsing perl is distributed between yacc, the lexer, smoke and mirrors." -=head2 What are all these $@%* punctuation signs, and how do I know when to use them? +=head2 What are all these $@%&* punctuation signs, and how do I know when to use them? They are type specifiers, as detailed in L<perldata>: $ for scalar values (number, string or reference) @ for arrays % for hashes (associative arrays) + & for subroutines (aka functions, procedures, methods) * for all types of that symbol name. In version 4 you used them like pointers, but in modern perls you can just use references. -While there are a few places where you don't actually need these type -specifiers, you should always use them. - A couple of others that you're likely to encounter that aren't really type specifiers are: <> are used for inputting a record from a filehandle. \ takes a reference to something. -Note that E<lt>FILEE<gt> is I<neither> the type specifier for files -nor the name of the handle. It is the C<E<lt>E<gt>> operator applied +Note that <FILE> is I<neither> the type specifier for files +nor the name of the handle. It is the C<< <> >> operator applied to the handle FILE. It reads one line (well, record - see L<perlvar/$/>) from the handle FILE in scalar context, or I<all> lines in list context. When performing open, close, or any other operation -besides C<E<lt>E<gt>> on files, or even talking about the handle, do +besides C<< <> >> on files, or even talking about the handle, do I<not> use the brackets. These are correct: C<eof(FH)>, C<seek(FH, 0, 2)> and "copying from STDIN to FILE". @@ -51,7 +49,7 @@ I<not> use the brackets. These are correct: C<eof(FH)>, C<seek(FH, 0, Normally, a bareword doesn't need to be quoted, but in most cases probably should be (and must be under C<use strict>). But a hash key consisting of a simple word (that isn't the name of a defined -subroutine) and the left-hand operand to the C<=E<gt>> operator both +subroutine) and the left-hand operand to the C<< => >> operator both count as though they were quoted: This is like this @@ -86,8 +84,17 @@ Another way is to use undef as an element on the left-hand-side: =head2 How do I temporarily block warnings? -The C<$^W> variable (documented in L<perlvar>) controls -runtime warnings for a block: +If you are running Perl 5.6.0 or better, the C<use warnings> pragma +allows fine control of what warning are produced. +See L<perllexwarn> for more details. + + { + no warnings; # temporarily turn off warnings + $a = $b + $c; # I know these might be undef + } + +If you have an older version of Perl, the C<$^W> variable (documented +in L<perlvar>) controls runtime warnings for a block: { local $^W = 0; # temporarily turn off warnings @@ -97,10 +104,6 @@ runtime warnings for a block: Note that like all the punctuation variables, you cannot currently use my() on C<$^W>, only local(). -A new C<use warnings> pragma is in the works to provide finer control -over all this. The curious should check the perl5-porters mailing list -archives for details. - =head2 What's an extension? A way of calling compiled C code from Perl. Reading L<perlxstut> @@ -170,17 +173,18 @@ own module. Make sure to change the names appropriately. package Some::Module; # assumes Some/Module.pm use strict; + use warnings; BEGIN { use Exporter (); - use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); ## set the version for version checking; uncomment to use ## $VERSION = 1.00; # if using RCS/CVS, this next line may be preferred, # but beware two-digit versions. - $VERSION = do{my@r=q$Revision: 1.24 $=~/\d+/g;sprintf '%d.'.'%02d'x$#r,@r}; + $VERSION = do{my@r=q$Revision: 1.28 $=~/\d+/g;sprintf '%d.'.'%02d'x$#r,@r}; @ISA = qw(Exporter); @EXPORT = qw(&func1 &func2 &func3); @@ -190,10 +194,11 @@ own module. Make sure to change the names appropriately. # as well as any optionally exported functions @EXPORT_OK = qw($Var1 %Hashit); } - use vars @EXPORT_OK; + our @EXPORT_OK; # non-exported package globals go here - use vars qw( @more $stuff ); + our @more; + our $stuff; # initialize package globals, first exported ones $Var1 = ''; @@ -308,10 +313,10 @@ you want to pass in a bit of code into a function: my $line; timeout( 30, sub { $line = <STDIN> } ); -If the code to execute had been passed in as a string, C<'$line = -E<lt>STDINE<gt>'>, there would have been no way for the hypothetical -timeout() function to access the lexical variable $line back in its -caller's scope. +If the code to execute had been passed in as a string, +C<< '$line = <STDIN>' >>, there would have been no way for the +hypothetical timeout() function to access the lexical variable +$line back in its caller's scope. =head2 What is variable suicide and how can I prevent it? @@ -330,12 +335,13 @@ harder. Take this code: print "Finally $f\n"; The $f that has "bar" added to it three times should be a new C<$f> -(C<my $f> should create a new local variable each time through the -loop). It isn't, however. This is a bug, and will be fixed. +(C<my $f> should create a new local variable each time through the loop). +It isn't, however. This was a bug, now fixed in the latest releases +(tested against 5.004_05, 5.005_03, and 5.005_56). -=head2 How can I pass/return a {Function, FileHandle, Array, Hash, Method, Regexp}? +=head2 How can I pass/return a {Function, FileHandle, Array, Hash, Method, Regex}? -With the exception of regexps, you need to pass references to these +With the exception of regexes, you need to pass references to these objects. See L<perlsub/"Pass by Reference"> for this particular question, and L<perlref> for information on references. @@ -391,28 +397,42 @@ If you're planning on generating new filehandles, you could do this: $fh = openit('< /etc/motd'); print <$fh>; -=item Passing Regexps +=item Passing Regexes + +To pass regexes around, you'll need to be using a release of Perl +sufficiently recent as to support the C<qr//> construct, pass around +strings and use an exception-trapping eval, or else be very, very clever. + +Here's an example of how to pass in a string to be regex compared +using C<qr//>: + + sub compare($$) { + my ($val1, $regex) = @_; + my $retval = $val1 =~ /$regex/; + return $retval; + } + $match = compare("old McDonald", qr/d.*D/i); -To pass regexps around, you'll need to either use one of the highly -experimental regular expression modules from CPAN (Nick Ing-Simmons's -Regexp or Ilya Zakharevich's Devel::Regexp), pass around strings -and use an exception-trapping eval, or else be very, very clever. -Here's an example of how to pass in a string to be regexp compared: +Notice how C<qr//> allows flags at the end. That pattern was compiled +at compile time, although it was executed later. The nifty C<qr//> +notation wasn't introduced until the 5.005 release. Before that, you +had to approach this problem much less intuitively. For example, here +it is again if you don't have C<qr//>: sub compare($$) { - my ($val1, $regexp) = @_; - my $retval = eval { $val =~ /$regexp/ }; + my ($val1, $regex) = @_; + my $retval = eval { $val1 =~ /$regex/ }; die if $@; return $retval; } - $match = compare("old McDonald", q/d.*D/); + $match = compare("old McDonald", q/($?i)d.*D/); Make sure you never say something like this: - return eval "\$val =~ /$regexp/"; # WRONG + return eval "\$val =~ /$regex/"; # WRONG -or someone can sneak shell escapes into the regexp due to the double +or someone can sneak shell escapes into the regex due to the double interpolation of the eval and the double-quoted string. For example: $pattern_of_evil = 'danger ${ system("rm -rf * &") } danger'; @@ -567,10 +587,10 @@ However, dynamic variables (aka global, local, or package variables) are effectively shallowly bound. Consider this just one more reason not to use them. See the answer to L<"What's a closure?">. -=head2 Why doesn't "my($foo) = E<lt>FILEE<gt>;" work right? +=head2 Why doesn't "my($foo) = <FILE>;" work right? C<my()> and C<local()> give list context to the right hand side -of C<=>. The E<lt>FHE<gt> read operation, like so many of Perl's +of C<=>. The <FH> read operation, like so many of Perl's functions and operators, can tell which context it was called in and behaves appropriately. In general, the scalar() function can help. This function does nothing to the data itself (contrary to popular myth) @@ -597,7 +617,7 @@ Why do you want to do that? :-) If you want to override a predefined function, such as open(), then you'll have to import the new definition from a different -module. See L<perlsub/"Overriding Builtin Functions">. There's +module. See L<perlsub/"Overriding Built-in Functions">. There's also an example in L<perltoot/"Class::Template">. If you want to overload a Perl operator, such as C<+> or C<**>, @@ -630,7 +650,7 @@ where they don't belong. This is explained in more depth in the L<perlsyn>. Briefly, there's no official case statement, because of the variety of tests possible in Perl (numeric comparison, string comparison, glob comparison, -regexp matching, overloaded comparisons, ...). Larry couldn't decide +regex matching, overloaded comparisons, ...). Larry couldn't decide how best to do this, so he left it out, even though it's been on the wish list since perl1. @@ -752,7 +772,7 @@ before Perl has seen that such a package exists. It's wisest to make sure your packages are all defined before you start using them, which will be taken care of if you use the C<use> statement instead of C<require>. If not, make sure to use arrow notation (eg, -C<Guru-E<gt>find("Samy")>) instead. Object notation is explained in +C<< Guru->find("Samy") >>) instead. Object notation is explained in L<perlobj>. Make sure to read about creating modules in L<perlmod> and @@ -826,6 +846,106 @@ Use this code, provided by Mark-Jason Dominus: Or, if you're using a recent release of Perl, you can just use the Symbol::delete_package() function instead. +=head2 How can I use a variable as a variable name? + +Beginners often think they want to have a variable contain the name +of a variable. + + $fred = 23; + $varname = "fred"; + ++$$varname; # $fred now 24 + +This works I<sometimes>, but it is a very bad idea for two reasons. + +The first reason is that they I<only work on global variables>. +That means above that if $fred is a lexical variable created with my(), +that the code won't work at all: you'll accidentally access the global +and skip right over the private lexical altogether. Global variables +are bad because they can easily collide accidentally and in general make +for non-scalable and confusing code. + +Symbolic references are forbidden under the C<use strict> pragma. +They are not true references and consequently are not reference counted +or garbage collected. + +The other reason why using a variable to hold the name of another +variable a bad idea is that the question often stems from a lack of +understanding of Perl data structures, particularly hashes. By using +symbolic references, you are just using the package's symbol-table hash +(like C<%main::>) instead of a user-defined hash. The solution is to +use your own hash or a real reference instead. + + $fred = 23; + $varname = "fred"; + $USER_VARS{$varname}++; # not $$varname++ + +There we're using the %USER_VARS hash instead of symbolic references. +Sometimes this comes up in reading strings from the user with variable +references and wanting to expand them to the values of your perl +program's variables. This is also a bad idea because it conflates the +program-addressable namespace and the user-addressable one. Instead of +reading a string and expanding it to the actual contents of your program's +own variables: + + $str = 'this has a $fred and $barney in it'; + $str =~ s/(\$\w+)/$1/eeg; # need double eval + +Instead, it would be better to keep a hash around like %USER_VARS and have +variable references actually refer to entries in that hash: + + $str =~ s/\$(\w+)/$USER_VARS{$1}/g; # no /e here at all + +That's faster, cleaner, and safer than the previous approach. Of course, +you don't need to use a dollar sign. You could use your own scheme to +make it less confusing, like bracketed percent symbols, etc. + + $str = 'this has a %fred% and %barney% in it'; + $str =~ s/%(\w+)%/$USER_VARS{$1}/g; # no /e here at all + +Another reason that folks sometimes think they want a variable to contain +the name of a variable is because they don't know how to build proper +data structures using hashes. For example, let's say they wanted two +hashes in their program: %fred and %barney, and to use another scalar +variable to refer to those by name. + + $name = "fred"; + $$name{WIFE} = "wilma"; # set %fred + + $name = "barney"; + $$name{WIFE} = "betty"; # set %barney + +This is still a symbolic reference, and is still saddled with the +problems enumerated above. It would be far better to write: + + $folks{"fred"}{WIFE} = "wilma"; + $folks{"barney"}{WIFE} = "betty"; + +And just use a multilevel hash to start with. + +The only times that you absolutely I<must> use symbolic references are +when you really must refer to the symbol table. This may be because it's +something that can't take a real reference to, such as a format name. +Doing so may also be important for method calls, since these always go +through the symbol table for resolution. + +In those cases, you would turn off C<strict 'refs'> temporarily so you +can play around with the symbol table. For example: + + @colors = qw(red blue green yellow orange purple violet); + for my $name (@colors) { + no strict 'refs'; # renege for the block + *$name = sub { "<FONT COLOR='$name'>@_</FONT>" }; + } + +All those functions (red(), blue(), green(), etc.) appear to be separate, +but the real code in the closure actually was compiled only once. + +So, sometimes you might want to use symbolic references to directly +manipulate the symbol table. This doesn't matter for formats, handles, and +subroutines, because they are always global -- you can't use my() on them. +But for scalars, arrays, and hashes -- and usually for subroutines -- +you probably want to use hard references only. + =head1 AUTHOR AND COPYRIGHT Copyright (c) 1997-1999 Tom Christiansen and Nathan Torkington. @@ -833,7 +953,7 @@ All rights reserved. When included as part of the Standard Version of Perl, or as part of its complete documentation whether printed or otherwise, this work -may be distributed only under the terms of Perl's Artistic Licence. +may be distributed only under the terms of Perl's Artistic License. Any distribution of this file or derivatives thereof I<outside> of that package require that special arrangements be made with copyright holder. @@ -843,4 +963,3 @@ are hereby placed into the public domain. You are permitted and encouraged to use this code in your own programs for fun or for profit as you see fit. A simple comment in the code giving credit would be courteous but is not required. - diff --git a/contrib/perl5/pod/perlfaq8.pod b/contrib/perl5/pod/perlfaq8.pod index 9ef41af..ed22ba0 100644 --- a/contrib/perl5/pod/perlfaq8.pod +++ b/contrib/perl5/pod/perlfaq8.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq8 - System Interaction ($Revision: 1.36 $, $Date: 1999/01/08 05:36:34 $) +perlfaq8 - System Interaction ($Revision: 1.39 $, $Date: 1999/05/23 18:37:57 $) =head1 DESCRIPTION @@ -15,8 +15,9 @@ contain more detailed information on the vagaries of your perl. =head2 How do I find out which operating system I'm running under? -The $^O variable ($OSNAME if you use English) contains the operating -system that your perl binary was built for. +The $^O variable ($OSNAME if you use English) contains an indication of +the name of the operating system (not its release number) that your perl +binary was built for. =head2 How come exec() doesn't return? @@ -74,7 +75,7 @@ Or like this: =head2 How do I read just one key without waiting for a return key? Controlling input buffering is a remarkably system-dependent matter. -If most systems, you can just use the B<stty> command as shown in +On many systems, you can just use the B<stty> command as shown in L<perlfunc/getc>, but as you see, that's already getting you into portability snags. @@ -167,7 +168,7 @@ not to block: =head2 How do I clear the screen? -If you only have to so infrequently, use C<system>: +If you only have do so infrequently, use C<system>: system("clear"); @@ -409,7 +410,7 @@ For example: } However, because syscalls restart by default, you'll find that if -you're in a "slow" call, such as E<lt>FHE<gt>, read(), connect(), or +you're in a "slow" call, such as <FH>, read(), connect(), or wait(), that the only way to terminate them is by "longjumping" out; that is, by raising an exception. See the time-out handler for a blocking flock() in L<perlipc/"Signals"> or chapter 6 of the Camel. @@ -421,7 +422,7 @@ properly, the getpw*() functions described in L<perlfunc> should in theory provide (read-only) access to entries in the shadow password file. To change the file, make a new shadow password file (the format varies from system to system - see L<passwd(5)> for specifics) and use -pwd_mkdb(8) to install it (see L<pwd_mkdb(5)> for more details). +pwd_mkdb(8) to install it (see L<pwd_mkdb(8)> for more details). =head2 How do I set the time and date? @@ -461,7 +462,7 @@ something like this: $done = $start = pack($TIMEVAL_T, ()); - syscall( &SYS_gettimeofday, $start, 0) != -1 + syscall(&SYS_gettimeofday, $start, 0) != -1 or die "gettimeofday: $!"; ########################## @@ -699,7 +700,7 @@ case the fork()/exec() description still applies. Strictly speaking, nothing. Stylistically speaking, it's not a good way to write maintainable code because backticks have a (potentially -humungous) return value, and you're ignoring it. It's may also not be very +humongous) return value, and you're ignoring it. It's may also not be very efficient, because you have to read in all the lines of output, allocate memory for them, and then throw it away. Too often people are lulled to writing: @@ -725,7 +726,7 @@ In most cases, this could and probably should be written as system("cat /etc/termcap") == 0 or die "cat program failed!"; -Which will get the output quickly (as its generated, instead of only +Which will get the output quickly (as it is generated, instead of only at the end) and also check the return value. system() also provides direct control over whether shell wildcard @@ -751,8 +752,14 @@ You have to do this: } Just as with system(), no shell escapes happen when you exec() a list. +Further examples of this can be found in L<perlipc/"Safe Pipe Opens">. -There are more examples of this L<perlipc/"Safe Pipe Opens">. +Note that if you're stuck on Microsoft, no solution to this vexing issue +is even possible. Even if Perl were to emulate fork(), you'd still +be hosed, because Microsoft gives no argc/argv-style API. Their API +always reparses from a single string, which is fundamentally wrong, +but you're not likely to get the Gods of Redmond to acknowledge this +and fix it for you. =head2 Why can't my script read from STDIN after I gave it EOF (^D on Unix, ^Z on MS-DOS)? @@ -928,7 +935,7 @@ the current process group of your controlling terminal as follows: =head2 How do I timeout a slow event? Use the alarm() function, probably in conjunction with a signal -handler, as documented L<perlipc/"Signals"> and chapter 6 of the +handler, as documented in L<perlipc/"Signals"> and chapter 6 of the Camel. You may instead use the more flexible Sys::AlarmCall module available from CPAN. @@ -945,10 +952,9 @@ in L<perlfunc/fork>. =head2 How do I use an SQL database? There are a number of excellent interfaces to SQL databases. See the -DBD::* modules available from -http://www.perl.com/CPAN/modules/dbperl/DBD . +DBD::* modules available from http://www.perl.com/CPAN/modules/DBD . A lot of information on this can be found at -http://www.hermetica.com/technologia/perl/DBI/index.html . +http://www.symbolstone.org/technology/perl/DBI/ =head2 How do I make a system() exit on control-C? @@ -970,12 +976,15 @@ sysopen(): sysopen(FH, "/tmp/somefile", O_WRONLY|O_NDELAY|O_CREAT, 0644) or die "can't open /tmp/somefile: $!": -=head2 How do I install a CPAN module? -The easiest way is to have the CPAN module do it for you. This module -comes with perl version 5.004 and later. To manually install the CPAN -module, or any well-behaved CPAN module for that matter, follow these -steps: + + +=head2 How do I install a module from CPAN? + +The easiest way is to have a module also named CPAN do it for you. +This module comes with perl version 5.004 and later. To manually install +the CPAN module, or any well-behaved CPAN module for that matter, follow +these steps: =over 4 @@ -1085,7 +1094,7 @@ All rights reserved. When included as part of the Standard Version of Perl, or as part of its complete documentation whether printed or otherwise, this work -may be distributed only under the terms of Perl's Artistic Licence. +may be distributed only under the terms of Perl's Artistic License. Any distribution of this file or derivatives thereof I<outside> of that package require that special arrangements be made with copyright holder. @@ -1095,4 +1104,3 @@ are hereby placed into the public domain. You are permitted and encouraged to use this code in your own programs for fun or for profit as you see fit. A simple comment in the code giving credit would be courteous but is not required. - diff --git a/contrib/perl5/pod/perlfaq9.pod b/contrib/perl5/pod/perlfaq9.pod index 6536064..16a803c 100644 --- a/contrib/perl5/pod/perlfaq9.pod +++ b/contrib/perl5/pod/perlfaq9.pod @@ -1,6 +1,6 @@ =head1 NAME -perlfaq9 - Networking ($Revision: 1.24 $, $Date: 1999/01/08 05:39:48 $) +perlfaq9 - Networking ($Revision: 1.26 $, $Date: 1999/05/23 16:08:30 $) =head1 DESCRIPTION @@ -76,11 +76,13 @@ stamp prepended. =head2 How do I remove HTML from a string? -The most correct way (albeit not the fastest) is to use HTML::Parse -from CPAN (part of the HTML-Tree package on CPAN). +The most correct way (albeit not the fastest) is to use HTML::Parser +from CPAN. Another mostly correct +way is to use HTML::FormatText which not only removes HTML but also +attempts to do a little simple formatting of the resulting plain text. Many folks attempt a simple-minded regular expression approach, like -C<s/E<lt>.*?E<gt>//g>, but that fails in many cases because the tags +C<< s/<.*?>//g >>, but that fails in many cases because the tags may continue over line breaks, they may contain quoted angle-brackets, or HTML comment may be present. Plus folks forget to convert entities, like C<<> for example. @@ -100,7 +102,7 @@ a solution: <IMG SRC = "foo.gif" ALT = "A > B"> - <IMG SRC = "foo.gif" + <IMG SRC = "foo.gif" ALT = "A > B"> <!-- <A comment> --> @@ -131,12 +133,11 @@ A quick but imperfect approach is }gsix; This version does not adjust relative URLs, understand alternate -bases, deal with HTML comments, deal with HREF and NAME attributes in -the same tag, or accept URLs themselves as arguments. It also runs -about 100x faster than a more "complete" solution using the LWP suite -of modules, such as the -http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/xurl.gz -program. +bases, deal with HTML comments, deal with HREF and NAME attributes +in the same tag, understand extra qualifiers like TARGET, or accept +URLs themselves as arguments. It also runs about 100x faster than a +more "complete" solution using the LWP suite of modules, such as the +http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/xurl.gz program. =head2 How do I download a file from the user's machine? How do I open a file on another machine? @@ -147,7 +148,7 @@ the same as the startform() method. =head2 How do I make a pop-up menu in HTML? -Use the B<E<lt>SELECTE<gt>> and B<E<lt>OPTIONE<gt>> tags. The CGI.pm +Use the B<< <SELECT> >> and B<< <OPTION> >> tags. The CGI.pm module (available from CPAN) supports this widget, as well as many others, including some that it cleverly synthesizes on its own. @@ -159,8 +160,9 @@ on your system, is this: $html_code = `lynx -source $url`; $text_data = `lynx -dump $url`; -The libwww-perl (LWP) modules from CPAN provide a more powerful way to -do this. They work through proxies, and don't require lynx: +The libwww-perl (LWP) modules from CPAN provide a more powerful way +to do this. They don't require lynx, but like lynx, can still work +through proxies: # simplest version use LWP::Simple; @@ -168,12 +170,12 @@ do this. They work through proxies, and don't require lynx: # or print HTML from a URL use LWP::Simple; - getprint "http://www.sn.no/libwww-perl/"; + getprint "http://www.linpro.no/lwp/"; # or print ASCII from HTML from a URL # also need HTML-Tree package from CPAN use LWP::Simple; - use HTML::Parse; + use HTML::Parser; use HTML::FormatText; my ($html, $ascii); $html = get("http://www.perl.com/"); @@ -213,11 +215,11 @@ Here's an example of decoding: $string =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge; Encoding is a bit harder, because you can't just blindly change -all the non-alphanumeric characters (C<\W>) into their hex escapes. +all the non-alphanumunder character (C<\W>) into their hex escapes. It's important that characters with special meaning like C</> and C<?> I<not> be translated. Probably the easiest way to get this right is to avoid reinventing the wheel and just use the URI::Escape module, -which is part of the libwww-perl package (LWP) available from CPAN. +available from CPAN. =head2 How do I redirect to another page? @@ -236,9 +238,21 @@ because of "optimizations" that servers do. print "Location: $url\n\n"; exit; -To be correct to the spec, each of those C<"\n"> -should really each be C<"\015\012">, but unless you're -stuck on MacOS, you probably won't notice. +To target a particular frame in a frameset, include the "Window-target:" +in the header. + + print <<EOF; + Location: http://www.domain.com/newpage + Window-target: <FrameName> + + EOF + +To be correct to the spec, each of those virtual newlines should really be +physical C<"\015\012"> sequences by the time you hit the client browser. +Except for NPH scripts, though, that local newline should get translated +by your server into standard form, so you shouldn't have a problem +here, even if you are stuck on MacOS. Everybody else probably won't +even notice. =head2 How do I put a password on my web pages? @@ -329,7 +343,7 @@ RFC-822 (the mail header standard) compliant, and addresses that aren't deliverable which are compliant. Many are tempted to try to eliminate many frequently-invalid -mail addresses with a simple regexp, such as +mail addresses with a simple regex, such as C</^[\w.-]+\@([\w.-]\.)+\w+$/>. It's a very bad idea. However, this also throws out many valid ones, and says nothing about potential deliverability, so is not suggested. Instead, see @@ -382,12 +396,12 @@ format after minor transliterations: =head2 How do I return the user's mail address? -On systems that support getpwuid, the $E<lt> variable and the +On systems that support getpwuid, the $< variable and the Sys::Hostname module (which is part of the standard perl distribution), you can probably try using something like this: use Sys::Hostname; - $address = sprintf('%s@%s', getpwuid($<), hostname); + $address = sprintf('%s@%s', scalar getpwuid($<), hostname); Company policies on mail address can mean that this generates addresses that the company's mail system will not accept, so you should ask for @@ -423,7 +437,12 @@ the message into the queue. This last option means your message won't be immediately delivered, so leave it out if you want immediate delivery. -Or use the CPAN module Mail::Mailer: +Alternate, less convenient approaches include calling mail (sometimes +called mailx) directly or simply opening up port 25 have having an +intimate conversation between just you and the remote SMTP daemon, +probably sendmail. + +Or you might be able use the CPAN module Mail::Mailer: use Mail::Mailer; @@ -438,34 +457,17 @@ Or use the CPAN module Mail::Mailer: The Mail::Internet module uses Net::SMTP which is less Unix-centric than Mail::Mailer, but less reliable. Avoid raw SMTP commands. There -are many reasons to use a mail transport agent like sendmail. These +are many reasons to use a mail transport agent like sendmail. These include queueing, MX records, and security. =head2 How do I read mail? -Use the Mail::Folder module from CPAN (part of the MailFolder package) or -the Mail::Internet module from CPAN (also part of the MailTools package). - - # sending mail - use Mail::Internet; - use Mail::Header; - # say which mail host to use - $ENV{SMTPHOSTS} = 'mail.frii.com'; - # create headers - $header = new Mail::Header; - $header->add('From', 'gnat@frii.com'); - $header->add('Subject', 'Testing'); - $header->add('To', 'gnat@frii.com'); - # create body - $body = 'This is a test, ignore'; - # create mail object - $mail = new Mail::Internet(undef, Header => $header, Body => \[$body]); - # send it - $mail->smtpsend or die; - -Often a module is overkill, though. Here's a mail sorter. - - #!/usr/bin/perl +While you could use the Mail::Folder module from CPAN (part of the +MailFolder package) or the Mail::Internet module from CPAN (also part +of the MailTools package), often a module is overkill, though. Here's a +mail sorter. + + #!/usr/bin/perl # bysub1 - simple sort by subject my(@msgs, @sub); my $msgno = -1; @@ -476,12 +478,12 @@ Often a module is overkill, though. Here's a mail sorter. $sub[++$msgno] = lc($1) || ''; } $msgs[$msgno] .= $_; - } + } for my $i (sort { $sub[$a] cmp $sub[$b] || $a <=> $b } (0 .. $#msgs)) { print $msgs[$i]; } -Or more succinctly, +Or more succinctly, #!/usr/bin/perl -n00 # bysub2 - awkish sort-by-subject @@ -541,7 +543,7 @@ All rights reserved. When included as part of the Standard Version of Perl, or as part of its complete documentation whether printed or otherwise, this work -may be distributed only under the terms of Perl's Artistic Licence. +may be distributed only under the terms of Perl's Artistic License. Any distribution of this file or derivatives thereof I<outside> of that package require that special arrangements be made with copyright holder. @@ -551,4 +553,3 @@ are hereby placed into the public domain. You are permitted and encouraged to use this code in your own programs for fun or for profit as you see fit. A simple comment in the code giving credit would be courteous but is not required. - diff --git a/contrib/perl5/pod/perlfilter.pod b/contrib/perl5/pod/perlfilter.pod new file mode 100644 index 0000000..c3c8315 --- /dev/null +++ b/contrib/perl5/pod/perlfilter.pod @@ -0,0 +1,570 @@ +=head1 NAME + +perlfilter - Source Filters + + +=head1 DESCRIPTION + +This article is about a little-known feature of Perl called +I<source filters>. Source filters alter the program text of a module +before Perl sees it, much as a C preprocessor alters the source text of +a C program before the compiler sees it. This article tells you more +about what source filters are, how they work, and how to write your +own. + +The original purpose of source filters was to let you encrypt your +program source to prevent casual piracy. This isn't all they can do, as +you'll soon learn. But first, the basics. + +=head1 CONCEPTS + +Before the Perl interpreter can execute a Perl script, it must first +read it from a file into memory for parsing and compilation. If that +script itself includes other scripts with a C<use> or C<require> +statement, then each of those scripts will have to be read from their +respective files as well. + +Now think of each logical connection between the Perl parser and an +individual file as a I<source stream>. A source stream is created when +the Perl parser opens a file, it continues to exist as the source code +is read into memory, and it is destroyed when Perl is finished parsing +the file. If the parser encounters a C<require> or C<use> statement in +a source stream, a new and distinct stream is created just for that +file. + +The diagram below represents a single source stream, with the flow of +source from a Perl script file on the left into the Perl parser on the +right. This is how Perl normally operates. + + file -------> parser + +There are two important points to remember: + +=over 5 + +=item 1. + +Although there can be any number of source streams in existence at any +given time, only one will be active. + +=item 2. + +Every source stream is associated with only one file. + +=back + +A source filter is a special kind of Perl module that intercepts and +modifies a source stream before it reaches the parser. A source filter +changes our diagram like this: + + file ----> filter ----> parser + +If that doesn't make much sense, consider the analogy of a command +pipeline. Say you have a shell script stored in the compressed file +I<trial.gz>. The simple pipeline command below runs the script without +needing to create a temporary file to hold the uncompressed file. + + gunzip -c trial.gz | sh + +In this case, the data flow from the pipeline can be represented as follows: + + trial.gz ----> gunzip ----> sh + +With source filters, you can store the text of your script compressed and use a source filter to uncompress it for Perl's parser: + + compressed gunzip + Perl program ---> source filter ---> parser + +=head1 USING FILTERS + +So how do you use a source filter in a Perl script? Above, I said that +a source filter is just a special kind of module. Like all Perl +modules, a source filter is invoked with a use statement. + +Say you want to pass your Perl source through the C preprocessor before +execution. You could use the existing C<-P> command line option to do +this, but as it happens, the source filters distribution comes with a C +preprocessor filter module called Filter::cpp. Let's use that instead. + +Below is an example program, C<cpp_test>, which makes use of this filter. +Line numbers have been added to allow specific lines to be referenced +easily. + + 1: use Filter::cpp ; + 2: #define TRUE 1 + 3: $a = TRUE ; + 4: print "a = $a\n" ; + +When you execute this script, Perl creates a source stream for the +file. Before the parser processes any of the lines from the file, the +source stream looks like this: + + cpp_test ---------> parser + +Line 1, C<use Filter::cpp>, includes and installs the C<cpp> filter +module. All source filters work this way. The use statement is compiled +and executed at compile time, before any more of the file is read, and +it attaches the cpp filter to the source stream behind the scenes. Now +the data flow looks like this: + + cpp_test ----> cpp filter ----> parser + +As the parser reads the second and subsequent lines from the source +stream, it feeds those lines through the C<cpp> source filter before +processing them. The C<cpp> filter simply passes each line through the +real C preprocessor. The output from the C preprocessor is then +inserted back into the source stream by the filter. + + .-> cpp --. + | | + | | + | <-' + cpp_test ----> cpp filter ----> parser + +The parser then sees the following code: + + use Filter::cpp ; + $a = 1 ; + print "a = $a\n" ; + +Let's consider what happens when the filtered code includes another +module with use: + + 1: use Filter::cpp ; + 2: #define TRUE 1 + 3: use Fred ; + 4: $a = TRUE ; + 5: print "a = $a\n" ; + +The C<cpp> filter does not apply to the text of the Fred module, only +to the text of the file that used it (C<cpp_test>). Although the use +statement on line 3 will pass through the cpp filter, the module that +gets included (C<Fred>) will not. The source streams look like this +after line 3 has been parsed and before line 4 is parsed: + + cpp_test ---> cpp filter ---> parser (INACTIVE) + + Fred.pm ----> parser + +As you can see, a new stream has been created for reading the source +from C<Fred.pm>. This stream will remain active until all of C<Fred.pm> +has been parsed. The source stream for C<cpp_test> will still exist, +but is inactive. Once the parser has finished reading Fred.pm, the +source stream associated with it will be destroyed. The source stream +for C<cpp_test> then becomes active again and the parser reads line 4 +and subsequent lines from C<cpp_test>. + +You can use more than one source filter on a single file. Similarly, +you can reuse the same filter in as many files as you like. + +For example, if you have a uuencoded and compressed source file, it is +possible to stack a uudecode filter and an uncompression filter like +this: + + use Filter::uudecode ; use Filter::uncompress ; + M'XL(".H<US4''V9I;F%L')Q;>7/;1I;_>_I3=&E=%:F*I"T?22Q/ + M6]9*<IQCO*XFT"0[PL%%'Y+IG?WN^ZYN-$'J.[.JE$,20/?K=_[> + ... + +Once the first line has been processed, the flow will look like this: + + file ---> uudecode ---> uncompress ---> parser + filter filter + +Data flows through filters in the same order they appear in the source +file. The uudecode filter appeared before the uncompress filter, so the +source file will be uudecoded before it's uncompressed. + +=head1 WRITING A SOURCE FILTER + +There are three ways to write your own source filter. You can write it +in C, use an external program as a filter, or write the filter in Perl. +I won't cover the first two in any great detail, so I'll get them out +of the way first. Writing the filter in Perl is most convenient, so +I'll devote the most space to it. + +=head1 WRITING A SOURCE FILTER IN C + +The first of the three available techniques is to write the filter +completely in C. The external module you create interfaces directly +with the source filter hooks provided by Perl. + +The advantage of this technique is that you have complete control over +the implementation of your filter. The big disadvantage is the +increased complexity required to write the filter - not only do you +need to understand the source filter hooks, but you also need a +reasonable knowledge of Perl guts. One of the few times it is worth +going to this trouble is when writing a source scrambler. The +C<decrypt> filter (which unscrambles the source before Perl parses it) +included with the source filter distribution is an example of a C +source filter (see Decryption Filters, below). + + +=over 5 + +=item B<Decryption Filters> + +All decryption filters work on the principle of "security through +obscurity." Regardless of how well you write a decryption filter and +how strong your encryption algorithm, anyone determined enough can +retrieve the original source code. The reason is quite simple - once +the decryption filter has decrypted the source back to its original +form, fragments of it will be stored in the computer's memory as Perl +parses it. The source might only be in memory for a short period of +time, but anyone possessing a debugger, skill, and lots of patience can +eventually reconstruct your program. + +That said, there are a number of steps that can be taken to make life +difficult for the potential cracker. The most important: Write your +decryption filter in C and statically link the decryption module into +the Perl binary. For further tips to make life difficult for the +potential cracker, see the file I<decrypt.pm> in the source filters +module. + +=back + +=head1 CREATING A SOURCE FILTER AS A SEPARATE EXECUTABLE + +An alternative to writing the filter in C is to create a separate +executable in the language of your choice. The separate executable +reads from standard input, does whatever processing is necessary, and +writes the filtered data to standard output. C<Filter:cpp> is an +example of a source filter implemented as a separate executable - the +executable is the C preprocessor bundled with your C compiler. + +The source filter distribution includes two modules that simplify this +task: C<Filter::exec> and C<Filter::sh>. Both allow you to run any +external executable. Both use a coprocess to control the flow of data +into and out of the external executable. (For details on coprocesses, +see Stephens, W.R. "Advanced Programming in the UNIX Environment." +Addison-Wesley, ISBN 0-210-56317-7, pages 441-445.) The difference +between them is that C<Filter::exec> spawns the external command +directly, while C<Filter::sh> spawns a shell to execute the external +command. (Unix uses the Bourne shell; NT uses the cmd shell.) Spawning +a shell allows you to make use of the shell metacharacters and +redirection facilities. + +Here is an example script that uses C<Filter::sh>: + + use Filter::sh 'tr XYZ PQR' ; + $a = 1 ; + print "XYZ a = $a\n" ; + +The output you'll get when the script is executed: + + PQR a = 1 + +Writing a source filter as a separate executable works fine, but a +small performance penalty is incurred. For example, if you execute the +small example above, a separate subprocess will be created to run the +Unix C<tr> command. Each use of the filter requires its own subprocess. +If creating subprocesses is expensive on your system, you might want to +consider one of the other options for creating source filters. + +=head1 WRITING A SOURCE FILTER IN PERL + +The easiest and most portable option available for creating your own +source filter is to write it completely in Perl. To distinguish this +from the previous two techniques, I'll call it a Perl source filter. + +To help understand how to write a Perl source filter we need an example +to study. Here is a complete source filter that performs rot13 +decoding. (Rot13 is a very simple encryption scheme used in Usenet +postings to hide the contents of offensive posts. It moves every letter +forward thirteen places, so that A becomes N, B becomes O, and Z +becomes M.) + + + package Rot13 ; + + use Filter::Util::Call ; + + sub import { + my ($type) = @_ ; + my ($ref) = [] ; + filter_add(bless $ref) ; + } + + sub filter { + my ($self) = @_ ; + my ($status) ; + + tr/n-za-mN-ZA-M/a-zA-Z/ + if ($status = filter_read()) > 0 ; + $status ; + } + + 1; + +All Perl source filters are implemented as Perl classes and have the +same basic structure as the example above. + +First, we include the C<Filter::Util::Call> module, which exports a +number of functions into your filter's namespace. The filter shown +above uses two of these functions, C<filter_add()> and +C<filter_read()>. + +Next, we create the filter object and associate it with the source +stream by defining the C<import> function. If you know Perl well +enough, you know that C<import> is called automatically every time a +module is included with a use statement. This makes C<import> the ideal +place to both create and install a filter object. + +In the example filter, the object (C<$ref>) is blessed just like any +other Perl object. Our example uses an anonymous array, but this isn't +a requirement. Because this example doesn't need to store any context +information, we could have used a scalar or hash reference just as +well. The next section demonstrates context data. + +The association between the filter object and the source stream is made +with the C<filter_add()> function. This takes a filter object as a +parameter (C<$ref> in this case) and installs it in the source stream. + +Finally, there is the code that actually does the filtering. For this +type of Perl source filter, all the filtering is done in a method +called C<filter()>. (It is also possible to write a Perl source filter +using a closure. See the C<Filter::Util::Call> manual page for more +details.) It's called every time the Perl parser needs another line of +source to process. The C<filter()> method, in turn, reads lines from +the source stream using the C<filter_read()> function. + +If a line was available from the source stream, C<filter_read()> +returns a status value greater than zero and appends the line to C<$_>. +A status value of zero indicates end-of-file, less than zero means an +error. The filter function itself is expected to return its status in +the same way, and put the filtered line it wants written to the source +stream in C<$_>. The use of C<$_> accounts for the brevity of most Perl +source filters. + +In order to make use of the rot13 filter we need some way of encoding +the source file in rot13 format. The script below, C<mkrot13>, does +just that. + + die "usage mkrot13 filename\n" unless @ARGV ; + my $in = $ARGV[0] ; + my $out = "$in.tmp" ; + open(IN, "<$in") or die "Cannot open file $in: $!\n"; + open(OUT, ">$out") or die "Cannot open file $out: $!\n"; + + print OUT "use Rot13;\n" ; + while (<IN>) { + tr/a-zA-Z/n-za-mN-ZA-M/ ; + print OUT ; + } + + close IN; + close OUT; + unlink $in; + rename $out, $in; + +If we encrypt this with C<mkrot13>: + + print " hello fred \n" ; + +the result will be this: + + use Rot13; + cevag "uryyb serq\a" ; + +Running it produces this output: + + hello fred + +=head1 USING CONTEXT: THE DEBUG FILTER + +The rot13 example was a trivial example. Here's another demonstration +that shows off a few more features. + +Say you wanted to include a lot of debugging code in your Perl script +during development, but you didn't want it available in the released +product. Source filters offer a solution. In order to keep the example +simple, let's say you wanted the debugging output to be controlled by +an environment variable, C<DEBUG>. Debugging code is enabled if the +variable exists, otherwise it is disabled. + +Two special marker lines will bracket debugging code, like this: + + ## DEBUG_BEGIN + if ($year > 1999) { + warn "Debug: millennium bug in year $year\n" ; + } + ## DEBUG_END + +When the C<DEBUG> environment variable exists, the filter ensures that +Perl parses only the code between the C<DEBUG_BEGIN> and C<DEBUG_END> +markers. That means that when C<DEBUG> does exist, the code above +should be passed through the filter unchanged. The marker lines can +also be passed through as-is, because the Perl parser will see them as +comment lines. When C<DEBUG> isn't set, we need a way to disable the +debug code. A simple way to achieve that is to convert the lines +between the two markers into comments: + + ## DEBUG_BEGIN + #if ($year > 1999) { + # warn "Debug: millennium bug in year $year\n" ; + #} + ## DEBUG_END + +Here is the complete Debug filter: + + package Debug; + + use strict; + use warnings; + use Filter::Util::Call ; + + use constant TRUE => 1 ; + use constant FALSE => 0 ; + + sub import { + my ($type) = @_ ; + my (%context) = ( + Enabled => defined $ENV{DEBUG}, + InTraceBlock => FALSE, + Filename => (caller)[1], + LineNo => 0, + LastBegin => 0, + ) ; + filter_add(bless \%context) ; + } + + sub Die { + my ($self) = shift ; + my ($message) = shift ; + my ($line_no) = shift || $self->{LastBegin} ; + die "$message at $self->{Filename} line $line_no.\n" + } + + sub filter { + my ($self) = @_ ; + my ($status) ; + $status = filter_read() ; + ++ $self->{LineNo} ; + + # deal with EOF/error first + if ($status <= 0) { + $self->Die("DEBUG_BEGIN has no DEBUG_END") + if $self->{InTraceBlock} ; + return $status ; + } + + if ($self->{InTraceBlock}) { + if (/^\s*##\s*DEBUG_BEGIN/ ) { + $self->Die("Nested DEBUG_BEGIN", $self->{LineNo}) + } elsif (/^\s*##\s*DEBUG_END/) { + $self->{InTraceBlock} = FALSE ; + } + + # comment out the debug lines when the filter is disabled + s/^/#/ if ! $self->{Enabled} ; + } elsif ( /^\s*##\s*DEBUG_BEGIN/ ) { + $self->{InTraceBlock} = TRUE ; + $self->{LastBegin} = $self->{LineNo} ; + } elsif ( /^\s*##\s*DEBUG_END/ ) { + $self->Die("DEBUG_END has no DEBUG_BEGIN", $self->{LineNo}); + } + return $status ; + } + + 1 ; + +The big difference between this filter and the previous example is the +use of context data in the filter object. The filter object is based on +a hash reference, and is used to keep various pieces of context +information between calls to the filter function. All but two of the +hash fields are used for error reporting. The first of those two, +Enabled, is used by the filter to determine whether the debugging code +should be given to the Perl parser. The second, InTraceBlock, is true +when the filter has encountered a C<DEBUG_BEGIN> line, but has not yet +encountered the following C<DEBUG_END> line. + +If you ignore all the error checking that most of the code does, the +essence of the filter is as follows: + + sub filter { + my ($self) = @_ ; + my ($status) ; + $status = filter_read() ; + + # deal with EOF/error first + return $status if $status <= 0 ; + if ($self->{InTraceBlock}) { + if (/^\s*##\s*DEBUG_END/) { + $self->{InTraceBlock} = FALSE + } + + # comment out debug lines when the filter is disabled + s/^/#/ if ! $self->{Enabled} ; + } elsif ( /^\s*##\s*DEBUG_BEGIN/ ) { + $self->{InTraceBlock} = TRUE ; + } + return $status ; + } + +Be warned: just as the C-preprocessor doesn't know C, the Debug filter +doesn't know Perl. It can be fooled quite easily: + + print <<EOM; + ##DEBUG_BEGIN + EOM + +Such things aside, you can see that a lot can be achieved with a modest +amount of code. + +=head1 CONCLUSION + +You now have better understanding of what a source filter is, and you +might even have a possible use for them. If you feel like playing with +source filters but need a bit of inspiration, here are some extra +features you could add to the Debug filter. + +First, an easy one. Rather than having debugging code that is +all-or-nothing, it would be much more useful to be able to control +which specific blocks of debugging code get included. Try extending the +syntax for debug blocks to allow each to be identified. The contents of +the C<DEBUG> environment variable can then be used to control which +blocks get included. + +Once you can identify individual blocks, try allowing them to be +nested. That isn't difficult either. + +Here is a interesting idea that doesn't involve the Debug filter. +Currently Perl subroutines have fairly limited support for formal +parameter lists. You can specify the number of parameters and their +type, but you still have to manually take them out of the C<@_> array +yourself. Write a source filter that allows you to have a named +parameter list. Such a filter would turn this: + + sub MySub ($first, $second, @rest) { ... } + +into this: + + sub MySub($$@) { + my ($first) = shift ; + my ($second) = shift ; + my (@rest) = @_ ; + ... + } + +Finally, if you feel like a real challenge, have a go at writing a +full-blown Perl macro preprocessor as a source filter. Borrow the +useful features from the C preprocessor and any other macro processors +you know. The tricky bit will be choosing how much knowledge of Perl's +syntax you want your filter to have. + +=head1 REQUIREMENTS + +The Source Filters distribution is available on CPAN, in + + CPAN/modules/by-module/Filter + +=head1 AUTHOR + +Paul Marquess E<lt>Paul.Marquess@btinternet.comE<gt> + +=head1 Copyrights + +This article originally appeared in The Perl Journal #11, and is +copyright 1998 The Perl Journal. It appears courtesy of Jon Orwant and +The Perl Journal. This document may be distributed under the same terms +as Perl itself. diff --git a/contrib/perl5/pod/perlfork.pod b/contrib/perl5/pod/perlfork.pod new file mode 100644 index 0000000..d930e93 --- /dev/null +++ b/contrib/perl5/pod/perlfork.pod @@ -0,0 +1,301 @@ +=head1 NAME + +perlfork - Perl's fork() emulation + +=head1 SYNOPSIS + +Perl provides a fork() keyword that corresponds to the Unix system call +of the same name. On most Unix-like platforms where the fork() system +call is available, Perl's fork() simply calls it. + +On some platforms such as Windows where the fork() system call is not +available, Perl can be built to emulate fork() at the interpreter level. +While the emulation is designed to be as compatible as possible with the +real fork() at the the level of the Perl program, there are certain +important differences that stem from the fact that all the pseudo child +"processes" created this way live in the same real process as far as the +operating system is concerned. + +This document provides a general overview of the capabilities and +limitations of the fork() emulation. Note that the issues discussed here +are not applicable to platforms where a real fork() is available and Perl +has been configured to use it. + +=head1 DESCRIPTION + +The fork() emulation is implemented at the level of the Perl interpreter. +What this means in general is that running fork() will actually clone the +running interpreter and all its state, and run the cloned interpreter in +a separate thread, beginning execution in the new thread just after the +point where the fork() was called in the parent. We will refer to the +thread that implements this child "process" as the pseudo-process. + +To the Perl program that called fork(), all this is designed to be +transparent. The parent returns from the fork() with a pseudo-process +ID that can be subsequently used in any process manipulation functions; +the child returns from the fork() with a value of C<0> to signify that +it is the child pseudo-process. + +=head2 Behavior of other Perl features in forked pseudo-processes + +Most Perl features behave in a natural way within pseudo-processes. + +=over 8 + +=item $$ or $PROCESS_ID + +This special variable is correctly set to the pseudo-process ID. +It can be used to identify pseudo-processes within a particular +session. Note that this value is subject to recycling if any +pseudo-processes are launched after others have been wait()-ed on. + +=item %ENV + +Each pseudo-process maintains its own virtual enviroment. Modifications +to %ENV affect the virtual environment, and are only visible within that +pseudo-process, and in any processes (or pseudo-processes) launched from +it. + +=item chdir() and all other builtins that accept filenames + +Each pseudo-process maintains its own virtual idea of the current directory. +Modifications to the current directory using chdir() are only visible within +that pseudo-process, and in any processes (or pseudo-processes) launched from +it. All file and directory accesses from the pseudo-process will correctly +map the virtual working directory to the real working directory appropriately. + +=item wait() and waitpid() + +wait() and waitpid() can be passed a pseudo-process ID returned by fork(). +These calls will properly wait for the termination of the pseudo-process +and return its status. + +=item kill() + +kill() can be used to terminate a pseudo-process by passing it the ID returned +by fork(). This should not be used except under dire circumstances, because +the operating system may not guarantee integrity of the process resources +when a running thread is terminated. Note that using kill() on a +pseudo-process() may typically cause memory leaks, because the thread that +implements the pseudo-process does not get a chance to clean up its resources. + +=item exec() + +Calling exec() within a pseudo-process actually spawns the requested +executable in a separate process and waits for it to complete before +exiting with the same exit status as that process. This means that the +process ID reported within the running executable will be different from +what the earlier Perl fork() might have returned. Similarly, any process +manipulation functions applied to the ID returned by fork() will affect the +waiting pseudo-process that called exec(), not the real process it is +waiting for after the exec(). + +=item exit() + +exit() always exits just the executing pseudo-process, after automatically +wait()-ing for any outstanding child pseudo-processes. Note that this means +that the process as a whole will not exit unless all running pseudo-processes +have exited. + +=item Open handles to files, directories and network sockets + +All open handles are dup()-ed in pseudo-processes, so that closing +any handles in one process does not affect the others. See below for +some limitations. + +=back + +=head2 Resource limits + +In the eyes of the operating system, pseudo-processes created via the fork() +emulation are simply threads in the same process. This means that any +process-level limits imposed by the operating system apply to all +pseudo-processes taken together. This includes any limits imposed by the +operating system on the number of open file, directory and socket handles, +limits on disk space usage, limits on memory size, limits on CPU utilization +etc. + +=head2 Killing the parent process + +If the parent process is killed (either using Perl's kill() builtin, or +using some external means) all the pseudo-processes are killed as well, +and the whole process exits. + +=head2 Lifetime of the parent process and pseudo-processes + +During the normal course of events, the parent process and every +pseudo-process started by it will wait for their respective pseudo-children +to complete before they exit. This means that the parent and every +pseudo-child created by it that is also a pseudo-parent will only exit +after their pseudo-children have exited. + +A way to mark a pseudo-processes as running detached from their parent (so +that the parent would not have to wait() for them if it doesn't want to) +will be provided in future. + +=head2 CAVEATS AND LIMITATIONS + +=over 8 + +=item BEGIN blocks + +The fork() emulation will not work entirely correctly when called from +within a BEGIN block. The forked copy will run the contents of the +BEGIN block, but will not continue parsing the source stream after the +BEGIN block. For example, consider the following code: + + BEGIN { + fork and exit; # fork child and exit the parent + print "inner\n"; + } + print "outer\n"; + +This will print: + + inner + +rather than the expected: + + inner + outer + +This limitation arises from fundamental technical difficulties in +cloning and restarting the stacks used by the Perl parser in the +middle of a parse. + +=item Open filehandles + +Any filehandles open at the time of the fork() will be dup()-ed. Thus, +the files can be closed independently in the parent and child, but beware +that the dup()-ed handles will still share the same seek pointer. Changing +the seek position in the parent will change it in the child and vice-versa. +One can avoid this by opening files that need distinct seek pointers +separately in the child. + +=item Forking pipe open() not yet implemented + +The C<open(FOO, "|-")> and C<open(BAR, "-|")> constructs are not yet +implemented. This limitation can be easily worked around in new code +by creating a pipe explicitly. The following example shows how to +write to a forked child: + + # simulate open(FOO, "|-") + sub pipe_to_fork ($) { + my $parent = shift; + pipe my $child, $parent or die; + my $pid = fork(); + die "fork() failed: $!" unless defined $pid; + if ($pid) { + close $child; + } + else { + close $parent; + open(STDIN, "<&=" . fileno($child)) or die; + } + $pid; + } + + if (pipe_to_fork('FOO')) { + # parent + print FOO "pipe_to_fork\n"; + close FOO; + } + else { + # child + while (<STDIN>) { print; } + close STDIN; + exit(0); + } + +And this one reads from the child: + + # simulate open(FOO, "-|") + sub pipe_from_fork ($) { + my $parent = shift; + pipe $parent, my $child or die; + my $pid = fork(); + die "fork() failed: $!" unless defined $pid; + if ($pid) { + close $child; + } + else { + close $parent; + open(STDOUT, ">&=" . fileno($child)) or die; + } + $pid; + } + + if (pipe_from_fork('BAR')) { + # parent + while (<BAR>) { print; } + close BAR; + } + else { + # child + print "pipe_from_fork\n"; + close STDOUT; + exit(0); + } + +Forking pipe open() constructs will be supported in future. + +=item Global state maintained by XSUBs + +External subroutines (XSUBs) that maintain their own global state may +not work correctly. Such XSUBs will either need to maintain locks to +protect simultaneous access to global data from different pseudo-processes, +or maintain all their state on the Perl symbol table, which is copied +naturally when fork() is called. A callback mechanism that provides +extensions an opportunity to clone their state will be provided in the +near future. + +=item Interpreter embedded in larger application + +The fork() emulation may not behave as expected when it is executed in an +application which embeds a Perl interpreter and calls Perl APIs that can +evaluate bits of Perl code. This stems from the fact that the emulation +only has knowledge about the Perl interpreter's own data structures and +knows nothing about the containing application's state. For example, any +state carried on the application's own call stack is out of reach. + +=item Thread-safety of extensions + +Since the fork() emulation runs code in multiple threads, extensions +calling into non-thread-safe libraries may not work reliably when +calling fork(). As Perl's threading support gradually becomes more +widely adopted even on platforms with a native fork(), such extensions +are expected to be fixed for thread-safety. + +=back + +=head1 BUGS + +=over 8 + +=item * + +Having pseudo-process IDs be negative integers breaks down for the integer +C<-1> because the wait() and waitpid() functions treat this number as +being special. The tacit assumption in the current implementation is that +the system never allocates a thread ID of C<1> for user threads. A better +representation for pseudo-process IDs will be implemented in future. + +=item * + +This document may be incomplete in some respects. + +=back + +=head1 AUTHOR + +Support for concurrent interpreters and the fork() emulation was implemented +by ActiveState, with funding from Microsoft Corporation. + +This document is authored and maintained by Gurusamy Sarathy +E<lt>gsar@activestate.comE<gt>. + +=head1 SEE ALSO + +L<perlfunc/"fork">, L<perlipc> + +=cut diff --git a/contrib/perl5/pod/perlfunc.pod b/contrib/perl5/pod/perlfunc.pod index 5fb7863..5396fd1 100644 --- a/contrib/perl5/pod/perlfunc.pod +++ b/contrib/perl5/pod/perlfunc.pod @@ -30,7 +30,7 @@ Elements of the LIST should be separated by commas. Any function in the list below may be used either with or without parentheses around its arguments. (The syntax descriptions omit the parentheses.) If you use the parentheses, the simple (but occasionally -surprising) rule is this: It I<LOOKS> like a function, therefore it I<IS> a +surprising) rule is this: It I<looks> like a function, therefore it I<is> a function, and precedence doesn't matter. Otherwise it's a list operator or unary operator, and precedence does matter. And whitespace between the function and left parenthesis doesn't count--so you need to @@ -80,8 +80,8 @@ In general, functions in Perl that serve as wrappers for system calls of the same name (like chown(2), fork(2), closedir(2), etc.) all return true when they succeed and C<undef> otherwise, as is usually mentioned in the descriptions below. This is different from the C interfaces, -which return C<-1> on failure. Exceptions to this rule are C<wait()>, -C<waitpid()>, and C<syscall()>. System calls also set the special C<$!> +which return C<-1> on failure. Exceptions to this rule are C<wait>, +C<waitpid>, and C<syscall>. System calls also set the special C<$!> variable on failure. Other functions do not, except accidentally. =head2 Perl Functions by Category @@ -255,7 +255,7 @@ A file test, where X is one of the letters listed below. This unary operator takes one argument, either a filename or a filehandle, and tests the associated file to see if something is true about it. If the argument is omitted, tests C<$_>, except for C<-t>, which tests STDIN. -Unless otherwise documented, it returns C<1> for TRUE and C<''> for FALSE, or +Unless otherwise documented, it returns C<1> for true and C<''> for false, or the undefined value if the file doesn't exist. Despite the funny names, precedence is the same as any other named unary operator, and the argument may be parenthesized like any other unary operator. The @@ -290,8 +290,8 @@ X<-S>X<-b>X<-c>X<-t>X<-u>X<-g>X<-k>X<-T>X<-B>X<-M>X<-A>X<-C> -g File has setgid bit set. -k File has sticky bit set. - -T File is a text file. - -B File is a binary file (opposite of -T). + -T File is an ASCII text file. + -B File is a "binary" file (opposite of -T). -M Age of file in days when script started. -A Same for access time. @@ -319,22 +319,32 @@ if any execute bit is set in the mode. Scripts run by the superuser may thus need to do a stat() to determine the actual mode of the file, or temporarily set their effective uid to something else. +If you are using ACLs, there is a pragma called C<filetest> that may +produce more accurate results than the bare stat() mode bits. +When under the C<use filetest 'access'> the above-mentioned filetests +will test whether the permission can (not) be granted using the +access() family of system calls. Also note that the C<-x> and C<-X> may +under this pragma return true even if there are no execute permission +bits set (nor any extra execute permission ACLs). This strangeness is +due to the underlying system calls' definitions. Read the +documentation for the C<filetest> pragma for more information. + Note that C<-s/a/b/> does not do a negated substitution. Saying C<-exp($foo)> still works as expected, however--only single letters following a minus are interpreted as file tests. The C<-T> and C<-B> switches work as follows. The first block or so of the file is examined for odd characters such as strange control codes or -characters with the high bit set. If too many strange characters (E<gt>30%) +characters with the high bit set. If too many strange characters (>30%) are found, it's a C<-B> file, otherwise it's a C<-T> file. Also, any file containing null in the first block is considered a binary file. If C<-T> or C<-B> is used on a filehandle, the current stdio buffer is examined -rather than the first block. Both C<-T> and C<-B> return TRUE on a null +rather than the first block. Both C<-T> and C<-B> return true on a null file, or a file at EOF when testing a filehandle. Because you have to read a file to do the C<-T> test, on most occasions you want to use a C<-f> against the file first, as in C<next unless -f $file && -T $file>. -If any of the file tests (or either the C<stat()> or C<lstat()> operators) are given +If any of the file tests (or either the C<stat> or C<lstat> operators) are given the special filehandle consisting of a solitary underline, then the stat structure of the previous file test (or stat operator) is used, saving a system call. (This doesn't work with C<-t>, and you need to remember @@ -363,9 +373,13 @@ If VALUE is omitted, uses C<$_>. =item accept NEWSOCKET,GENERICSOCKET Accepts an incoming socket connect, just as the accept(2) system call -does. Returns the packed address if it succeeded, FALSE otherwise. +does. Returns the packed address if it succeeded, false otherwise. See the example in L<perlipc/"Sockets: Client/Server Communication">. +On systems that support a close-on-exec flag on files, the flag will +be set for the newly opened file descriptor, as determined by the +value of $^F. See L<perlvar/$^F>. + =item alarm SECONDS =item alarm @@ -381,18 +395,18 @@ starting a new one. The returned value is the amount of time remaining on the previous timer. For delays of finer granularity than one second, you may use Perl's -four-arugment version of select() leaving the first three arguments -undefined, or you might be able to use the C<syscall()> interface to +four-argument version of select() leaving the first three arguments +undefined, or you might be able to use the C<syscall> interface to access setitimer(2) if your system supports it. The Time::HiRes module from CPAN may also prove useful. -It is usually a mistake to intermix C<alarm()> -and C<sleep()> calls. +It is usually a mistake to intermix C<alarm> and C<sleep> calls. +(C<sleep> may be internally implemented in your system with C<alarm>) -If you want to use C<alarm()> to time out a system call you need to use an -C<eval()>/C<die()> pair. You can't rely on the alarm causing the system call to +If you want to use C<alarm> to time out a system call you need to use an +C<eval>/C<die> pair. You can't rely on the alarm causing the system call to fail with C<$!> set to C<EINTR> because Perl sets up signal handlers to -restart system calls on some systems. Using C<eval()>/C<die()> always works, +restart system calls on some systems. Using C<eval>/C<die> always works, modulo the caveats given in L<perlipc/"Signals">. eval { @@ -413,7 +427,7 @@ modulo the caveats given in L<perlipc/"Signals">. Returns the arctangent of Y/X in the range -PI to PI. -For the tangent operation, you may use the C<POSIX::tan()> +For the tangent operation, you may use the C<Math::Trig::tan> function, or use the familiar relation: sub tan { sin($_[0]) / cos($_[0]) } @@ -421,29 +435,67 @@ function, or use the familiar relation: =item bind SOCKET,NAME Binds a network address to a socket, just as the bind system call -does. Returns TRUE if it succeeded, FALSE otherwise. NAME should be a +does. Returns true if it succeeded, false otherwise. NAME should be a packed address of the appropriate type for the socket. See the examples in L<perlipc/"Sockets: Client/Server Communication">. +=item binmode FILEHANDLE, DISCIPLINE + =item binmode FILEHANDLE -Arranges for the file to be read or written in "binary" mode in operating -systems that distinguish between binary and text files. Files that -are not in binary mode have CR LF sequences translated to LF on input -and LF translated to CR LF on output. Binmode has no effect under -many sytems, but in MS-DOS and similarly archaic systems, it may be -imperative--otherwise your MS-DOS-damaged C library may mangle your file. -The key distinction between systems that need C<binmode()> and those -that don't is their text file formats. Systems like Unix, MacOS, and -Plan9 that delimit lines with a single character, and that encode that -character in C as C<"\n">, do not need C<binmode()>. The rest may need it. -If FILEHANDLE is an expression, the value is taken as the name of the -filehandle. - -If the system does care about it, using it when you shouldn't is just as -perilous as failing to use it when you should. Fortunately for most of -us, you can't go wrong using binmode() on systems that don't care about -it, though. +Arranges for FILEHANDLE to be read or written in "binary" or "text" mode +on systems where the run-time libraries distinguish between binary and +text files. If FILEHANDLE is an expression, the value is taken as the +name of the filehandle. DISCIPLINE can be either of C<":raw"> for +binary mode or C<":crlf"> for "text" mode. If the DISCIPLINE is +omitted, it defaults to C<":raw">. + +binmode() should be called after open() but before any I/O is done on +the filehandle. + +On many systems binmode() currently has no effect, but in future, it +will be extended to support user-defined input and output disciplines. +On some systems binmode() is necessary when you're not working with a +text file. For the sake of portability it is a good idea to always use +it when appropriate, and to never use it when it isn't appropriate. + +In other words: Regardless of platform, use binmode() on binary +files, and do not use binmode() on text files. + +The C<open> pragma can be used to establish default disciplines. +See L<open>. + +The operating system, device drivers, C libraries, and Perl run-time +system all work together to let the programmer treat a single +character (C<\n>) as the line terminator, irrespective of the external +representation. On many operating systems, the native text file +representation matches the internal representation, but on some +platforms the external representation of C<\n> is made up of more than +one character. + +Mac OS and all variants of Unix use a single character to end each line +in the external representation of text (even though that single +character is not necessarily the same across these platforms). +Consequently binmode() has no effect on these operating systems. In +other systems like VMS, MS-DOS and the various flavors of MS-Windows +your program sees a C<\n> as a simple C<\cJ>, but what's stored in text +files are the two characters C<\cM\cJ>. That means that, if you don't +use binmode() on these systems, C<\cM\cJ> sequences on disk will be +converted to C<\n> on input, and any C<\n> in your program will be +converted back to C<\cM\cJ> on output. This is what you want for text +files, but it can be disastrous for binary files. + +Another consequence of using binmode() (on some systems) is that +special end-of-file markers will be seen as part of the data stream. +For systems from the Microsoft family this means that if your binary +data contains C<\cZ>, the I/O subsystem will ragard it as the end of +the file, unless you use binmode(). + +binmode() is not only important for readline() and print() operations, +but also when using read(), seek(), sysread(), syswrite() and tell() +(see L<perlport> for more details). See the C<$/> and C<$\> variables +in L<perlvar> for how to manually set your input and output +line-termination sequences. =item bless REF,CLASSNAME @@ -451,7 +503,7 @@ it, though. This function tells the thingy referenced by REF that it is now an object in the CLASSNAME package. If CLASSNAME is omitted, the current package -is used. Because a C<bless()> is often the last thing in a constructor. +is used. Because a C<bless> is often the last thing in a constructor, it returns the reference for convenience. Always use the two-argument version if the function doing the blessing might be inherited by a derived class. See L<perltoot> and L<perlobj> for more about the blessing @@ -471,7 +523,7 @@ See L<perlmod/"Perl Modules">. Returns the context of the current subroutine call. In scalar context, returns the caller's package name if there is a caller, that is, if -we're in a subroutine or C<eval()> or C<require()>, and the undefined value +we're in a subroutine or C<eval> or C<require>, and the undefined value otherwise. In list context, returns ($package, $filename, $line) = caller; @@ -480,33 +532,37 @@ With EXPR, it returns some extra information that the debugger uses to print a stack trace. The value of EXPR indicates how many call frames to go back before the current one. - ($package, $filename, $line, $subroutine, - $hasargs, $wantarray, $evaltext, $is_require) = caller($i); + ($package, $filename, $line, $subroutine, $hasargs, + $wantarray, $evaltext, $is_require, $hints, $bitmask) = caller($i); -Here C<$subroutine> may be C<"(eval)"> if the frame is not a subroutine -call, but an C<eval()>. In such a case additional elements C<$evaltext> and +Here $subroutine may be C<(eval)> if the frame is not a subroutine +call, but an C<eval>. In such a case additional elements $evaltext and C<$is_require> are set: C<$is_require> is true if the frame is created by a -C<require> or C<use> statement, C<$evaltext> contains the text of the +C<require> or C<use> statement, $evaltext contains the text of the C<eval EXPR> statement. In particular, for a C<eval BLOCK> statement, -C<$filename> is C<"(eval)">, but C<$evaltext> is undefined. (Note also that +$filename is C<(eval)>, but $evaltext is undefined. (Note also that each C<use> statement creates a C<require> frame inside an C<eval EXPR>) -frame. +frame. C<$hints> and C<$bitmask> contain pragmatic hints that the caller +was compiled with. The C<$hints> and C<$bitmask> values are subject to +change between versions of Perl, and are not meant for external use. Furthermore, when called from within the DB package, caller returns more detailed information: it sets the list variable C<@DB::args> to be the arguments with which the subroutine was invoked. Be aware that the optimizer might have optimized call frames away before -C<caller()> had a chance to get the information. That means that C<caller(N)> +C<caller> had a chance to get the information. That means that C<caller(N)> might not return information about the call frame you expect it do, for -C<N E<gt> 1>. In particular, C<@DB::args> might have information from the -previous time C<caller()> was called. +C<< N > 1 >>. In particular, C<@DB::args> might have information from the +previous time C<caller> was called. =item chdir EXPR Changes the working directory to EXPR, if possible. If EXPR is omitted, -changes to the user's home directory. Returns TRUE upon success, -FALSE otherwise. See the example under C<die()>. +changes to the directory specified by C<$ENV{HOME}>, if set; if not, +changes to the directory specified by C<$ENV{LOGDIR}>. If neither is +set, C<chdir> does nothing. It returns true upon success, false +otherwise. See the example under C<die>. =item chmod LIST @@ -523,6 +579,14 @@ successfully changed. See also L</oct>, if all you have is a string. $mode = '0644'; chmod oct($mode), 'foo'; # this is better $mode = 0644; chmod $mode, 'foo'; # this is best +You can also import the symbolic C<S_I*> constants from the Fcntl +module: + + use Fcntl ':mode'; + + chmod S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH, @executables; + # This is identical to the chmod 0755 of the above example. + =item chomp VARIABLE =item chomp LIST @@ -536,6 +600,9 @@ number of characters removed from all its arguments. It's often used to remove the newline from the end of an input record when you're worried that the final record may be missing its newline. When in paragraph mode (C<$/ = "">), it removes all trailing newlines from the string. +When in slurp mode (C<$/ = undef>) or fixed-length record mode (C<$/> is +a reference to an integer or the like, see L<perlvar>) chomp() won't +remove anything. If VARIABLE is omitted, it chomps C<$_>. Example: while (<>) { @@ -576,16 +643,18 @@ You can actually chop anything that's an lvalue, including an assignment: chop($answer = <STDIN>); If you chop a list, each element is chopped. Only the value of the -last C<chop()> is returned. +last C<chop> is returned. -Note that C<chop()> returns the last character. To return all but the last +Note that C<chop> returns the last character. To return all but the last character, use C<substr($string, 0, -1)>. =item chown LIST Changes the owner (and group) of a list of files. The first two -elements of the list must be the I<NUMERICAL> uid and gid, in that order. -Returns the number of files successfully changed. +elements of the list must be the I<numeric> uid and gid, in that +order. A value of -1 in either position is interpreted by most +systems to leave that value unchanged. Returns the number of files +successfully changed. $cnt = chown $uid, $gid, 'foo', 'bar'; chown $uid, $gid, @filenames; @@ -593,9 +662,9 @@ Returns the number of files successfully changed. Here's an example that looks up nonnumeric uids in the passwd file: print "User: "; - chop($user = <STDIN>); + chomp($user = <STDIN>); print "Files: "; - chop($pattern = <STDIN>); + chomp($pattern = <STDIN>); ($login,$pass,$uid,$gid) = getpwnam($user) or die "$user not in passwd file"; @@ -607,13 +676,20 @@ On most systems, you are not allowed to change the ownership of the file unless you're the superuser, although you should be able to change the group to any of your secondary groups. On insecure systems, these restrictions may be relaxed, but this is not a portable assumption. +On POSIX systems, you can detect this condition this way: + + use POSIX qw(sysconf _PC_CHOWN_RESTRICTED); + $can_chown_giveaway = not sysconf(_PC_CHOWN_RESTRICTED); =item chr NUMBER =item chr Returns the character represented by that NUMBER in the character set. -For example, C<chr(65)> is C<"A"> in ASCII. For the reverse, use L</ord>. +For example, C<chr(65)> is C<"A"> in either ASCII or Unicode, and +chr(0x263a) is a Unicode smiley face (but only within the scope of +a C<use utf8>). For the reverse, use L</ord>. +See L<utf8> for more about Unicode. If NUMBER is omitted, uses C<$_>. @@ -623,33 +699,38 @@ If NUMBER is omitted, uses C<$_>. This function works like the system call by the same name: it makes the named directory the new root directory for all further pathnames that -begin with a C<"/"> by your process and all its children. (It doesn't +begin with a C</> by your process and all its children. (It doesn't change your current working directory, which is unaffected.) For security reasons, this call is restricted to the superuser. If FILENAME is -omitted, does a C<chroot()> to C<$_>. +omitted, does a C<chroot> to C<$_>. =item close FILEHANDLE =item close -Closes the file or pipe associated with the file handle, returning TRUE +Closes the file or pipe associated with the file handle, returning true only if stdio successfully flushes buffers and closes the system file -descriptor. Closes the currently selected filehandle if the argument +descriptor. Closes the currently selected filehandle if the argument is omitted. You don't have to close FILEHANDLE if you are immediately going to do -another C<open()> on it, because C<open()> will close it for you. (See -C<open()>.) However, an explicit C<close()> on an input file resets the line -counter (C<$.>), while the implicit close done by C<open()> does not. +another C<open> on it, because C<open> will close it for you. (See +C<open>.) However, an explicit C<close> on an input file resets the line +counter (C<$.>), while the implicit close done by C<open> does not. -If the file handle came from a piped open C<close()> will additionally -return FALSE if one of the other system calls involved fails or if the +If the file handle came from a piped open C<close> will additionally +return false if one of the other system calls involved fails or if the program exits with non-zero status. (If the only problem was that the program exited non-zero C<$!> will be set to C<0>.) Closing a pipe also waits for the process executing on the pipe to complete, in case you want to look at the output of the pipe afterwards, and implicitly puts the exit status value of that command into C<$?>. +Prematurely closing the read end of a pipe (i.e. before the process +writing to it at the other end has closed it) will result in a +SIGPIPE being delivered to the writer. If the other end can't +handle that, be sure to read all the data before closing the pipe. + Example: open(OUTPUT, '|sort >foo') # pipe to sort @@ -666,7 +747,7 @@ filehandle, usually the real filehandle name. =item closedir DIRHANDLE -Closes a directory opened by C<opendir()> and returns the success of that +Closes a directory opened by C<opendir> and returns the success of that system call. DIRHANDLE may be an expression whose value can be used as an indirect @@ -675,7 +756,7 @@ dirhandle, usually the real dirhandle name. =item connect SOCKET,NAME Attempts to connect to a remote socket, just as the connect system call -does. Returns TRUE if it succeeded, FALSE otherwise. NAME should be a +does. Returns true if it succeeded, false otherwise. NAME should be a packed address of the appropriate type for the socket. See the examples in L<perlipc/"Sockets: Client/Server Communication">. @@ -690,8 +771,8 @@ continued via the C<next> statement (which is similar to the C C<continue> statement). C<last>, C<next>, or C<redo> may appear within a C<continue> -block. C<last> and C<redo> will behave as if they had been executed within -the main block. So will C<next>, but since it will execute a C<continue> +block. C<last> and C<redo> will behave as if they had been executed within +the main block. So will C<next>, but since it will execute a C<continue> block, it may be more entertaining. while (EXPR) { @@ -705,7 +786,7 @@ block, it may be more entertaining. ### last always comes here Omitting the C<continue> section is semantically equivalent to using an -empty one, logically enough. In that case, C<next> goes directly back +empty one, logically enough. In that case, C<next> goes directly back to check the condition at the top of the loop. =item cos EXPR @@ -713,7 +794,7 @@ to check the condition at the top of the loop. Returns the cosine of EXPR (expressed in radians). If EXPR is omitted, takes cosine of C<$_>. -For the inverse cosine operation, you may use the C<POSIX::acos()> +For the inverse cosine operation, you may use the C<Math::Trig::acos()> function, or use this relation: sub acos { atan2( sqrt(1 - $_[0] * $_[0]), $_[0] ) } @@ -726,14 +807,14 @@ extirpated as a potential munition). This can prove useful for checking the password file for lousy passwords, amongst other things. Only the guys wearing white hats should do this. -Note that C<crypt()> is intended to be a one-way function, much like breaking +Note that C<crypt> is intended to be a one-way function, much like breaking eggs to make an omelette. There is no (known) corresponding decrypt function. As a result, this function isn't all that useful for cryptography. (For that, see your nearby CPAN mirror.) When verifying an existing encrypted string you should use the encrypted text as the salt (like C<crypt($plain, $crypted) eq $crypted>). This -allows your code to work with the standard C<crypt()> and with more +allows your code to work with the standard C<crypt> and with more exotic implementations. When choosing a new salt create a random two character string whose characters come from the set C<[./0-9A-Za-z]> (like C<join '', ('.', '/', 0..9, 'A'..'Z', 'a'..'z')[rand 64, rand 64]>). @@ -758,34 +839,40 @@ their own password: Of course, typing in your own password to whoever asks you for it is unwise. +The L<crypt> function is unsuitable for encrypting large quantities +of data, not least of all because you can't get the information +back. Look at the F<by-module/Crypt> and F<by-module/PGP> directories +on your favorite CPAN mirror for a slew of potentially useful +modules. + =item dbmclose HASH -[This function has been largely superseded by the C<untie()> function.] +[This function has been largely superseded by the C<untie> function.] Breaks the binding between a DBM file and a hash. -=item dbmopen HASH,DBNAME,MODE +=item dbmopen HASH,DBNAME,MASK -[This function has been largely superseded by the C<tie()> function.] +[This function has been largely superseded by the C<tie> function.] This binds a dbm(3), ndbm(3), sdbm(3), gdbm(3), or Berkeley DB file to a -hash. HASH is the name of the hash. (Unlike normal C<open()>, the first -argument is I<NOT> a filehandle, even though it looks like one). DBNAME +hash. HASH is the name of the hash. (Unlike normal C<open>, the first +argument is I<not> a filehandle, even though it looks like one). DBNAME is the name of the database (without the F<.dir> or F<.pag> extension if any). If the database does not exist, it is created with protection -specified by MODE (as modified by the C<umask()>). If your system supports -only the older DBM functions, you may perform only one C<dbmopen()> in your +specified by MASK (as modified by the C<umask>). If your system supports +only the older DBM functions, you may perform only one C<dbmopen> in your program. In older versions of Perl, if your system had neither DBM nor -ndbm, calling C<dbmopen()> produced a fatal error; it now falls back to +ndbm, calling C<dbmopen> produced a fatal error; it now falls back to sdbm(3). If you don't have write access to the DBM file, you can only read hash variables, not set them. If you want to test whether you can write, -either use file tests or try setting a dummy hash entry inside an C<eval()>, +either use file tests or try setting a dummy hash entry inside an C<eval>, which will trap the error. -Note that functions such as C<keys()> and C<values()> may return huge lists -when used on large DBM files. You may prefer to use the C<each()> +Note that functions such as C<keys> and C<values> may return huge lists +when used on large DBM files. You may prefer to use the C<each> function to iterate over large DBM files. Example: # print out history file offsets @@ -820,14 +907,21 @@ conditions. This function allows you to distinguish C<undef> from other values. (A simple Boolean test will not distinguish among C<undef>, zero, the empty string, and C<"0">, which are all equally false.) Note that since C<undef> is a valid scalar, its presence -doesn't I<necessarily> indicate an exceptional condition: C<pop()> +doesn't I<necessarily> indicate an exceptional condition: C<pop> returns C<undef> when its argument is an empty array, I<or> when the element to return happens to be C<undef>. -You may also use C<defined()> to check whether a subroutine exists, by -saying C<defined &func> without parentheses. On the other hand, use -of C<defined()> upon aggregates (hashes and arrays) is not guaranteed to -produce intuitive results, and should probably be avoided. +You may also use C<defined(&func)> to check whether subroutine C<&func> +has ever been defined. The return value is unaffected by any forward +declarations of C<&foo>. + +Use of C<defined> on aggregates (hashes and arrays) is deprecated. It +used to report whether memory for that aggregate has ever been +allocated. This behavior may disappear in future versions of Perl. +You should instead use a simple test for size: + + if (@an_array) { print "has array elements\n" } + if (%a_hash) { print "has hash members\n" } When used on a hash element, it tells you whether the value is defined, not whether the key exists in the hash. Use L</exists> for the latter @@ -842,7 +936,7 @@ Examples: sub foo { defined &$bar ? &$bar(@_) : die "No bar"; } $debugging = 0 unless defined $debugging; -Note: Many folks tend to overuse C<defined()>, and then are surprised to +Note: Many folks tend to overuse C<defined>, and then are surprised to discover that the number C<0> and C<""> (the zero-length string) are, in fact, defined values. For example, if you say @@ -853,69 +947,75 @@ matched "nothing". But it didn't really match nothing--rather, it matched something that happened to be zero characters long. This is all very above-board and honest. When a function returns an undefined value, it's an admission that it couldn't give you an honest answer. So you -should use C<defined()> only when you're questioning the integrity of what +should use C<defined> only when you're questioning the integrity of what you're trying to do. At other times, a simple comparison to C<0> or C<""> is what you want. -Currently, using C<defined()> on an entire array or hash reports whether -memory for that aggregate has ever been allocated. So an array you set -to the empty list appears undefined initially, and one that once was full -and that you then set to the empty list still appears defined. You -should instead use a simple test for size: - - if (@an_array) { print "has array elements\n" } - if (%a_hash) { print "has hash members\n" } - -Using C<undef()> on these, however, does clear their memory and then report -them as not defined anymore, but you shouldn't do that unless you don't -plan to use them again, because it saves time when you load them up -again to have memory already ready to be filled. The normal way to -free up space used by an aggregate is to assign the empty list. - -This counterintuitive behavior of C<defined()> on aggregates may be -changed, fixed, or broken in a future release of Perl. - See also L</undef>, L</exists>, L</ref>. =item delete EXPR -Deletes the specified key(s) and their associated values from a hash. -For each key, returns the deleted value associated with that key, or -the undefined value if there was no such key. Deleting from C<$ENV{}> -modifies the environment. Deleting from a hash tied to a DBM file -deletes the entry from the DBM file. (But deleting from a C<tie()>d hash -doesn't necessarily return anything.) +Given an expression that specifies a hash element, array element, hash slice, +or array slice, deletes the specified element(s) from the hash or array. +In the case of an array, if the array elements happen to be at the end, +the size of the array will shrink to the highest element that tests +true for exists() (or 0 if no such element exists). + +Returns each element so deleted or the undefined value if there was no such +element. Deleting from C<$ENV{}> modifies the environment. Deleting from +a hash tied to a DBM file deletes the entry from the DBM file. Deleting +from a C<tie>d hash or array may not necessarily return anything. -The following deletes all the values of a hash: +Deleting an array element effectively returns that position of the array +to its initial, uninitialized state. Subsequently testing for the same +element with exists() will return false. Note that deleting array +elements in the middle of an array will not shift the index of the ones +after them down--use splice() for that. See L</exists>. + +The following (inefficiently) deletes all the values of %HASH and @ARRAY: foreach $key (keys %HASH) { delete $HASH{$key}; } -And so does this: + foreach $index (0 .. $#ARRAY) { + delete $ARRAY[$index]; + } - delete @HASH{keys %HASH} +And so do these: + + delete @HASH{keys %HASH}; + + delete @ARRAY[0 .. $#ARRAY]; But both of these are slower than just assigning the empty list -or undefining it: +or undefining %HASH or @ARRAY: - %hash = (); # completely empty %hash - undef %hash; # forget %hash every existed + %HASH = (); # completely empty %HASH + undef %HASH; # forget %HASH ever existed + + @ARRAY = (); # completely empty @ARRAY + undef @ARRAY; # forget @ARRAY ever existed Note that the EXPR can be arbitrarily complicated as long as the final -operation is a hash element lookup or hash slice: +operation is a hash element, array element, hash slice, or array slice +lookup: delete $ref->[$x][$y]{$key}; delete @{$ref->[$x][$y]}{$key1, $key2, @morekeys}; + delete $ref->[$x][$y][$index]; + delete @{$ref->[$x][$y]}[$index1, $index2, @moreindices]; + =item die LIST -Outside an C<eval()>, prints the value of LIST to C<STDERR> and exits with -the current value of C<$!> (errno). If C<$!> is C<0>, exits with the value of -C<($? E<gt>E<gt> 8)> (backtick `command` status). If C<($? E<gt>E<gt> 8)> -is C<0>, exits with C<255>. Inside an C<eval(),> the error message is stuffed into -C<$@> and the C<eval()> is terminated with the undefined value. This makes -C<die()> the way to raise an exception. +Outside an C<eval>, prints the value of LIST to C<STDERR> and +exits with the current value of C<$!> (errno). If C<$!> is C<0>, +exits with the value of C<<< ($? >> 8) >>> (backtick `command` +status). If C<<< ($? >> 8) >>> is C<0>, exits with C<255>. Inside +an C<eval(),> the error message is stuffed into C<$@> and the +C<eval> is terminated with the undefined value. This makes +C<die> the way to raise an exception. Equivalent examples: @@ -969,25 +1069,26 @@ regular expressions. Here's an example: } } -Since perl will stringify uncaught exception messages before displaying +Because perl will stringify uncaught exception messages before displaying them, you may want to overload stringification operations on such custom exception objects. See L<overload> for details about that. -You can arrange for a callback to be run just before the C<die()> does -its deed, by setting the C<$SIG{__DIE__}> hook. The associated handler -will be called with the error text and can change the error message, if -it sees fit, by calling C<die()> again. See L<perlvar/$SIG{expr}> for details on -setting C<%SIG> entries, and L<"eval BLOCK"> for some examples. - -Note that the C<$SIG{__DIE__}> hook is currently called even inside -eval()ed blocks/strings! If one wants the hook to do nothing in such -situations, put +You can arrange for a callback to be run just before the C<die> +does its deed, by setting the C<$SIG{__DIE__}> hook. The associated +handler will be called with the error text and can change the error +message, if it sees fit, by calling C<die> again. See +L<perlvar/$SIG{expr}> for details on setting C<%SIG> entries, and +L<"eval BLOCK"> for some examples. Although this feature was meant +to be run only right before your program was to exit, this is not +currently the case--the C<$SIG{__DIE__}> hook is currently called +even inside eval()ed blocks/strings! If one wants the hook to do +nothing in such situations, put die @_ if $^S; -as the first line of the handler (see L<perlvar/$^S>). Because this -promotes action at a distance, this counterintuitive behavior may be fixed -in a future release. +as the first line of the handler (see L<perlvar/$^S>). Because +this promotes strange action at a distance, this counterintuitive +behavior may be fixed in a future release. =item do BLOCK @@ -1031,7 +1132,7 @@ successfully compiled, C<do> returns the value of the last expression evaluated. Note that inclusion of library modules is better done with the -C<use()> and C<require()> operators, which also do automatic error checking +C<use> and C<require> operators, which also do automatic error checking and raise an exception if there's a problem. You might like to use C<do> to read in a program configuration @@ -1052,60 +1153,49 @@ file. Manual error checking can be done this way: =item dump -This causes an immediate core dump. Primarily this is so that you can -use the B<undump> program to turn your core dump into an executable binary -after having initialized all your variables at the beginning of the -program. When the new binary is executed it will begin by executing a -C<goto LABEL> (with all the restrictions that C<goto> suffers). Think of -it as a goto with an intervening core dump and reincarnation. If C<LABEL> -is omitted, restarts the program from the top. WARNING: Any files -opened at the time of the dump will NOT be open any more when the -program is reincarnated, with possible resulting confusion on the part -of Perl. See also B<-u> option in L<perlrun>. - -Example: - - #!/usr/bin/perl - require 'getopt.pl'; - require 'stat.pl'; - %days = ( - 'Sun' => 1, - 'Mon' => 2, - 'Tue' => 3, - 'Wed' => 4, - 'Thu' => 5, - 'Fri' => 6, - 'Sat' => 7, - ); - - dump QUICKSTART if $ARGV[0] eq '-d'; - - QUICKSTART: - Getopt('f'); - -This operator is largely obsolete, partly because it's very hard to -convert a core file into an executable, and because the real perl-to-C -compiler has superseded it. +This function causes an immediate core dump. See also the B<-u> +command-line switch in L<perlrun>, which does the same thing. +Primarily this is so that you can use the B<undump> program (not +supplied) to turn your core dump into an executable binary after +having initialized all your variables at the beginning of the +program. When the new binary is executed it will begin by executing +a C<goto LABEL> (with all the restrictions that C<goto> suffers). +Think of it as a goto with an intervening core dump and reincarnation. +If C<LABEL> is omitted, restarts the program from the top. + +B<WARNING>: Any files opened at the time of the dump will I<not> +be open any more when the program is reincarnated, with possible +resulting confusion on the part of Perl. + +This function is now largely obsolete, partly because it's very +hard to convert a core file into an executable, and because the +real compiler backends for generating portable bytecode and compilable +C code have superseded it. + +If you're looking to use L<dump> to speed up your program, consider +generating bytecode or native C code as described in L<perlcc>. If +you're just trying to accelerate a CGI script, consider using the +C<mod_perl> extension to B<Apache>, or the CPAN module, Fast::CGI. +You might also consider autoloading or selfloading, which at least +make your program I<appear> to run faster. =item each HASH When called in list context, returns a 2-element list consisting of the key and value for the next element of a hash, so that you can iterate over it. When called in scalar context, returns the key for only the "next" -element in the hash. (Note: Keys may be C<"0"> or C<"">, which are logically -false; you may wish to avoid constructs like C<while ($k = each %foo) {}> -for this reason.) +element in the hash. Entries are returned in an apparently random order. The actual random order is subject to change in future versions of perl, but it is guaranteed -to be in the same order as either the C<keys()> or C<values()> function +to be in the same order as either the C<keys> or C<values> function would produce on the same (unmodified) hash. When the hash is entirely read, a null array is returned in list context -(which when assigned produces a FALSE (C<0>) value), and C<undef> in -scalar context. The next call to C<each()> after that will start iterating -again. There is a single iterator for each hash, shared by all C<each()>, -C<keys()>, and C<values()> function calls in the program; it can be reset by +(which when assigned produces a false (C<0>) value), and C<undef> in +scalar context. The next call to C<each> after that will start iterating +again. There is a single iterator for each hash, shared by all C<each>, +C<keys>, and C<values> function calls in the program; it can be reset by reading all the elements from the hash, or by evaluating C<keys HASH> or C<values HASH>. If you add or delete elements of a hash while you're iterating over it, you may get entries skipped or duplicated, so don't. @@ -1117,7 +1207,7 @@ only in a different order: print "$key=$value\n"; } -See also C<keys()>, C<values()> and C<sort()>. +See also C<keys>, C<values> and C<sort>. =item eof FILEHANDLE @@ -1128,17 +1218,22 @@ See also C<keys()>, C<values()> and C<sort()>. Returns 1 if the next read on FILEHANDLE will return end of file, or if FILEHANDLE is not open. FILEHANDLE may be an expression whose value gives the real filehandle. (Note that this function actually -reads a character and then C<ungetc()>s it, so isn't very useful in an +reads a character and then C<ungetc>s it, so isn't very useful in an interactive context.) Do not read from a terminal file (or call -C<eof(FILEHANDLE)> on it) after end-of-file is reached. Filetypes such +C<eof(FILEHANDLE)> on it) after end-of-file is reached. File types such as terminals may lose the end-of-file condition if you do. -An C<eof> without an argument uses the last file read as argument. -Using C<eof()> with empty parentheses is very different. It indicates -the pseudo file formed of the files listed on the command line, i.e., -C<eof()> is reasonable to use inside a C<while (E<lt>E<gt>)> loop to -detect the end of only the last file. Use C<eof(ARGV)> or eof without the -parentheses to test I<EACH> file in a while (E<lt>E<gt>) loop. Examples: +An C<eof> without an argument uses the last file read. Using C<eof()> +with empty parentheses is very different. It refers to the pseudo file +formed from the files listed on the command line and accessed via the +C<< <> >> operator. Since C<< <> >> isn't explicitly opened, +as a normal filehandle is, an C<eof()> before C<< <> >> has been +used will cause C<@ARGV> to be examined to determine if input is +available. + +In a C<< while (<>) >> loop, C<eof> or C<eof(ARGV)> can be used to +detect the end of each file, C<eof()> will only detect the end of the +last file. Examples: # reset line numbering on each input file while (<>) { @@ -1159,8 +1254,8 @@ parentheses to test I<EACH> file in a while (E<lt>E<gt>) loop. Examples: } Practical hint: you almost never need to use C<eof> in Perl, because the -input operators return false values when they run out of data, or if there -was an error. +input operators typically return C<undef> when they run out of data, or if +there was an error. =item eval EXPR @@ -1191,16 +1286,16 @@ as with subroutines. The expression providing the return value is evaluated in void, scalar, or list context, depending on the context of the eval itself. See L</wantarray> for more on how the evaluation context can be determined. -If there is a syntax error or runtime error, or a C<die()> statement is -executed, an undefined value is returned by C<eval()>, and C<$@> is set to the +If there is a syntax error or runtime error, or a C<die> statement is +executed, an undefined value is returned by C<eval>, and C<$@> is set to the error message. If there was no error, C<$@> is guaranteed to be a null -string. Beware that using C<eval()> neither silences perl from printing +string. Beware that using C<eval> neither silences perl from printing warnings to STDERR, nor does it stuff the text of warning messages into C<$@>. To do either of those, you have to use the C<$SIG{__WARN__}> facility. See L</warn> and L<perlvar>. -Note that, because C<eval()> traps otherwise-fatal errors, it is useful for -determining whether a particular feature (such as C<socket()> or C<symlink()>) +Note that, because C<eval> traps otherwise-fatal errors, it is useful for +determining whether a particular feature (such as C<socket> or C<symlink>) is implemented. It is also Perl's exception trapping mechanism, where the die operator is used to raise exceptions. @@ -1232,7 +1327,7 @@ as shown in this example: warn $@ if $@; This is especially significant, given that C<__DIE__> hooks can call -C<die()> again, which has the effect of changing their error messages: +C<die> again, which has the effect of changing their error messages: # __DIE__ hooks may modify error messages { @@ -1242,10 +1337,10 @@ C<die()> again, which has the effect of changing their error messages: print $@ if $@; # prints "bar lives here" } -Because this promotes action at a distance, this counterintuive behavior +Because this promotes action at a distance, this counterintuitive behavior may be fixed in a future release. -With an C<eval()>, you should be especially careful to remember what's +With an C<eval>, you should be especially careful to remember what's being looked at when: eval $x; # CASE 1 @@ -1258,13 +1353,13 @@ being looked at when: $$x++; # CASE 6 Cases 1 and 2 above behave identically: they run the code contained in -the variable C<$x>. (Although case 2 has misleading double quotes making +the variable $x. (Although case 2 has misleading double quotes making the reader wonder what else might be happening (nothing is).) Cases 3 and 4 likewise behave in the same way: they run the code C<'$x'>, which -does nothing but return the value of C<$x>. (Case 4 is preferred for +does nothing but return the value of $x. (Case 4 is preferred for purely visual reasons, but it also has the advantage of compiling at compile-time instead of at run-time.) Case 5 is a place where -normally you I<WOULD> like to use double quotes, except that in this +normally you I<would> like to use double quotes, except that in this particular situation, you can just use symbolic references instead, as in case 6. @@ -1275,15 +1370,15 @@ C<next>, C<last>, or C<redo> cannot be used to leave or restart the block. =item exec PROGRAM LIST -The C<exec()> function executes a system command I<AND NEVER RETURNS> - -use C<system()> instead of C<exec()> if you want it to return. It fails and -returns FALSE only if the command does not exist I<and> it is executed +The C<exec> function executes a system command I<and never returns>-- +use C<system> instead of C<exec> if you want it to return. It fails and +returns false only if the command does not exist I<and> it is executed directly instead of via your system's command shell (see below). -Since it's a common mistake to use C<exec()> instead of C<system()>, Perl -warns you if there is a following statement which isn't C<die()>, C<warn()>, -or C<exit()> (if C<-w> is set - but you always do that). If you -I<really> want to follow an C<exec()> with some other statement, you +Since it's a common mistake to use C<exec> instead of C<system>, Perl +warns you if there is a following statement which isn't C<die>, C<warn>, +or C<exit> (if C<-w> is set - but you always do that). If you +I<really> want to follow an C<exec> with some other statement, you can use one of these styles to avoid the warning: exec ('foo') or print STDERR "couldn't exec foo: $!"; @@ -1296,9 +1391,8 @@ the argument is checked for shell metacharacters, and if there are any, the entire argument is passed to the system's command shell for parsing (this is C</bin/sh -c> on Unix platforms, but varies on other platforms). If there are no shell metacharacters in the argument, it is split into -words and passed directly to C<execvp()>, which is more efficient. Note: -C<exec()> and C<system()> do not flush your output buffer, so you may need to -set C<$|> to avoid lost output. Examples: +words and passed directly to C<execvp>, which is more efficient. +Examples: exec '/bin/echo', 'Your arguments are: ', @ARGV; exec "sort $outfile | uniq"; @@ -1321,10 +1415,11 @@ When the arguments get executed via the system shell, results will be subject to its quirks and capabilities. See L<perlop/"`STRING`"> for details. -Using an indirect object with C<exec()> or C<system()> is also more secure. -This usage forces interpretation of the arguments as a multivalued list, -even if the list had just one argument. That way you're safe from the -shell expanding wildcards or splitting up words with whitespace in them. +Using an indirect object with C<exec> or C<system> is also more +secure. This usage (which also works fine with system()) forces +interpretation of the arguments as a multivalued list, even if the +list had just one argument. That way you're safe from the shell +expanding wildcards or splitting up words with whitespace in them. @args = ( "echo surprise" ); @@ -1337,32 +1432,57 @@ program, passing it C<"surprise"> an argument. The second version didn't--it tried to run a program literally called I<"echo surprise">, didn't find it, and set C<$?> to a non-zero value indicating failure. -Note that C<exec()> will not call your C<END> blocks, nor will it call +Beginning with v5.6.0, Perl will attempt to flush all files opened for +output before the exec, but this may not be supported on some platforms +(see L<perlport>). To be safe, you may need to set C<$|> ($AUTOFLUSH +in English) or call the C<autoflush()> method of C<IO::Handle> on any +open handles in order to avoid lost output. + +Note that C<exec> will not call your C<END> blocks, nor will it call any C<DESTROY> methods in your objects. =item exists EXPR -Returns TRUE if the specified hash key exists in its hash array, even -if the corresponding value is undefined. +Given an expression that specifies a hash element or array element, +returns true if the specified element in the hash or array has ever +been initialized, even if the corresponding value is undefined. The +element is not autovivified if it doesn't exist. - print "Exists\n" if exists $array{$key}; - print "Defined\n" if defined $array{$key}; - print "True\n" if $array{$key}; + print "Exists\n" if exists $hash{$key}; + print "Defined\n" if defined $hash{$key}; + print "True\n" if $hash{$key}; -A hash element can be TRUE only if it's defined, and defined if + print "Exists\n" if exists $array[$index]; + print "Defined\n" if defined $array[$index]; + print "True\n" if $array[$index]; + +A hash or array element can be true only if it's defined, and defined if it exists, but the reverse doesn't necessarily hold true. +Given an expression that specifies the name of a subroutine, +returns true if the specified subroutine has ever been declared, even +if it is undefined. Mentioning a subroutine name for exists or defined +does not count as declaring it. + + print "Exists\n" if exists &subroutine; + print "Defined\n" if defined &subroutine; + Note that the EXPR can be arbitrarily complicated as long as the final -operation is a hash key lookup: +operation is a hash or array key lookup or subroutine name: if (exists $ref->{A}->{B}->{$key}) { } if (exists $hash{A}{B}{$key}) { } -Although the last element will not spring into existence just because -its existence was tested, intervening ones will. Thus C<$ref-E<gt>{"A"}> -and C<$ref-E<gt>{"A"}-E<gt>{"B"}> will spring into existence due to the -existence test for a $key element. This happens anywhere the arrow -operator is used, including even + if (exists $ref->{A}->{B}->[$ix]) { } + if (exists $hash{A}{B}[$ix]) { } + + if (exists &{$ref->{A}{B}{$key}}) { } + +Although the deepest nested array or hash will not spring into existence +just because its existence was tested, any intervening ones will. +Thus C<< $ref->{"A"} >> and C<< $ref->{"A"}->{"B"} >> will spring +into existence due to the existence test for the $key element above. +This happens anywhere the arrow operator is used, including even: undef $ref; if (exists $ref->{"Some key"}) { } @@ -1372,6 +1492,15 @@ This surprising autovivification in what does not at first--or even second--glance appear to be an lvalue context may be fixed in a future release. +See L<perlref/"Pseudo-hashes: Using an array as a hash"> for specifics +on how exists() acts when used on a pseudo-hash. + +Use of a subroutine call, rather than a subroutine name, as an argument +to exists() is an error. + + exists ⊂ # OK + exists &sub(); # Error + =item exit EXPR Evaluates EXPR and exits immediately with that value. Example: @@ -1379,23 +1508,23 @@ Evaluates EXPR and exits immediately with that value. Example: $ans = <STDIN>; exit 0 if $ans =~ /^[Xx]/; -See also C<die()>. If EXPR is omitted, exits with C<0> status. The only +See also C<die>. If EXPR is omitted, exits with C<0> status. The only universally recognized values for EXPR are C<0> for success and C<1> for error; other values are subject to interpretation depending on the environment in which the Perl program is running. For example, exiting 69 (EX_UNAVAILABLE) from a I<sendmail> incoming-mail filter will cause the mailer to return the item undelivered, but that's not true everywhere. -Don't use C<exit()> to abort a subroutine if there's any chance that -someone might want to trap whatever error happened. Use C<die()> instead, -which can be trapped by an C<eval()>. +Don't use C<exit> to abort a subroutine if there's any chance that +someone might want to trap whatever error happened. Use C<die> instead, +which can be trapped by an C<eval>. -The exit() function does not always exit immediately. It calls any +The exit() function does not always exit immediately. It calls any defined C<END> routines first, but these C<END> routines may not -themselves abort the exit. Likewise any object destructors that need to +themselves abort the exit. Likewise any object destructors that need to be called are called before the real exit. If this is a problem, you can call C<POSIX:_exit($status)> to avoid END and destructor processing. -See L<perlsub> for details. +See L<perlmod> for details. =item exp EXPR @@ -1411,20 +1540,20 @@ Implements the fcntl(2) function. You'll probably have to say use Fcntl; first to get the correct constant definitions. Argument processing and -value return works just like C<ioctl()> below. +value return works just like C<ioctl> below. For example: use Fcntl; fcntl($filehandle, F_GETFL, $packed_return_buffer) or die "can't fcntl F_GETFL: $!"; -You don't have to check for C<defined()> on the return from C<fnctl()>. -Like C<ioctl()>, it maps a C<0> return from the system call into "C<0> -but true" in Perl. This string is true in boolean context and C<0> +You don't have to check for C<defined> on the return from C<fnctl>. +Like C<ioctl>, it maps a C<0> return from the system call into +C<"0 but true"> in Perl. This string is true in boolean context and C<0> in numeric context. It is also exempt from the normal B<-w> warnings on improper numeric conversions. -Note that C<fcntl()> will produce a fatal error if used on a machine that +Note that C<fcntl> will produce a fatal error if used on a machine that doesn't implement fcntl(2). See the Fcntl module or your fcntl(2) manpage to learn what functions are available on your system. @@ -1432,7 +1561,7 @@ manpage to learn what functions are available on your system. Returns the file descriptor for a filehandle, or undefined if the filehandle is not open. This is mainly useful for constructing -bitmaps for C<select()> and low-level POSIX tty-handling operations. +bitmaps for C<select> and low-level POSIX tty-handling operations. If FILEHANDLE is an expression, the value is taken as an indirect filehandle, generally its name. @@ -1445,17 +1574,17 @@ same underlying descriptor: =item flock FILEHANDLE,OPERATION -Calls flock(2), or an emulation of it, on FILEHANDLE. Returns TRUE -for success, FALSE on failure. Produces a fatal error if used on a +Calls flock(2), or an emulation of it, on FILEHANDLE. Returns true +for success, false on failure. Produces a fatal error if used on a machine that doesn't implement flock(2), fcntl(2) locking, or lockf(3). -C<flock()> is Perl's portable file locking interface, although it locks +C<flock> is Perl's portable file locking interface, although it locks only entire files, not records. Two potentially non-obvious but traditional C<flock> semantics are that it waits indefinitely until the lock is granted, and that its locks B<merely advisory>. Such discretionary locks are more flexible, but offer -fewer guarantees. This means that files locked with C<flock()> may be -modified by programs that do not also use C<flock()>. See L<perlport>, +fewer guarantees. This means that files locked with C<flock> may be +modified by programs that do not also use C<flock>. See L<perlport>, your port's specific documentation, or your system-specific local manpages for details. It's best to assume traditional behavior if you're writing portable programs. (But if you're not, you should as always feel perfectly @@ -1465,11 +1594,11 @@ in the way of your getting your job done.) OPERATION is one of LOCK_SH, LOCK_EX, or LOCK_UN, possibly combined with LOCK_NB. These constants are traditionally valued 1, 2, 8 and 4, but -you can use the symbolic names if import them from the Fcntl module, +you can use the symbolic names if you import them from the Fcntl module, either individually, or as a group using the ':flock' tag. LOCK_SH requests a shared lock, LOCK_EX requests an exclusive lock, and LOCK_UN -releases a previously requested lock. If LOCK_NB is added to LOCK_SH or -LOCK_EX then C<flock()> will return immediately rather than blocking +releases a previously requested lock. If LOCK_NB is bitwise-or'ed with +LOCK_SH or LOCK_EX then C<flock> will return immediately rather than blocking waiting for the lock (check the return status to see if you got it). To avoid the possibility of miscoordination, Perl now flushes FILEHANDLE @@ -1481,8 +1610,8 @@ are the semantics that lockf(3) implements. Most if not all systems implement lockf(3) in terms of fcntl(2) locking, though, so the differing semantics shouldn't bite too many people. -Note also that some versions of C<flock()> cannot lock things over the -network; you would need to use the more system-specific C<fcntl()> for +Note also that some versions of C<flock> cannot lock things over the +network; you would need to use the more system-specific C<fcntl> for that. If you like you can force Perl to ignore your system's flock(2) function, and so provide its own fcntl(2)-based emulation, by passing the switch C<-Ud_flock> to the F<Configure> program when you configure @@ -1527,11 +1656,13 @@ fork(), great care has gone into making it extremely efficient (for example, using copy-on-write technology on data pages), making it the dominant paradigm for multitasking over the last few decades. -Note: unflushed buffers remain unflushed in both processes, which means -you may need to set C<$|> ($AUTOFLUSH in English) or call the C<autoflush()> -method of C<IO::Handle> to avoid duplicate output. +Beginning with v5.6.0, Perl will attempt to flush all files opened for +output before forking the child process, but this may not be supported +on some platforms (see L<perlport>). To be safe, you may need to set +C<$|> ($AUTOFLUSH in English) or call the C<autoflush()> method of +C<IO::Handle> on any open handles in order to avoid duplicate output. -If you C<fork()> without ever waiting on your children, you will +If you C<fork> without ever waiting on your children, you will accumulate zombies. On some systems, you can avoid this by setting C<$SIG{CHLD}> to C<"IGNORE">. See also L<perlipc> for more examples of forking and reaping moribund children. @@ -1539,12 +1670,12 @@ forking and reaping moribund children. Note that if your forked child inherits system file descriptors like STDIN and STDOUT that are actually connected by a pipe or socket, even if you exit, then the remote server (such as, say, a CGI script or a -backgrounded job launced from a remote shell) won't think you're done. +backgrounded job launched from a remote shell) won't think you're done. You should reopen those to F</dev/null> if it's any issue. =item format -Declare a picture format for use by the C<write()> function. For +Declare a picture format for use by the C<write> function. For example: format Something = @@ -1565,18 +1696,18 @@ This is an internal function used by C<format>s, though you may call it, too. It formats (see L<perlform>) a list of values according to the contents of PICTURE, placing the output into the format output accumulator, C<$^A> (or C<$ACCUMULATOR> in English). -Eventually, when a C<write()> is done, the contents of +Eventually, when a C<write> is done, the contents of C<$^A> are written to some filehandle, but you could also read C<$^A> yourself and then set C<$^A> back to C<"">. Note that a format typically -does one C<formline()> per line of form, but the C<formline()> function itself +does one C<formline> per line of form, but the C<formline> function itself doesn't care how many newlines are embedded in the PICTURE. This means that the C<~> and C<~~> tokens will treat the entire PICTURE as a single line. You may therefore need to use multiple formlines to implement a single record format, just like the format compiler. -Be careful if you put double quotes around the picture, because an "C<@>" +Be careful if you put double quotes around the picture, because an C<@> character may be taken to mean the beginning of an array name. -C<formline()> always returns TRUE. See L<perlform> for other examples. +C<formline> always returns true. See L<perlform> for other examples. =item getc FILEHANDLE @@ -1609,7 +1740,7 @@ something more like: Determination of whether $BSD_STYLE should be set is left as an exercise to the reader. -The C<POSIX::getattr()> function can do this more portably on +The C<POSIX::getattr> function can do this more portably on systems purporting POSIX compliance. See also the C<Term::ReadKey> module from your nearest CPAN site; details on CPAN can be found on L<perlmodlib/CPAN>. @@ -1618,12 +1749,12 @@ L<perlmodlib/CPAN>. Implements the C library function of the same name, which on most systems returns the current login from F</etc/utmp>, if any. If null, -use C<getpwuid()>. +use C<getpwuid>. $login = getlogin || getpwuid($<) || "Kilroy"; -Do not consider C<getlogin()> for authentication: it is not as -secure as C<getpwuid()>. +Do not consider C<getlogin> for authentication: it is not as +secure as C<getpwuid>. =item getpeername SOCKET @@ -1631,7 +1762,7 @@ Returns the packed sockaddr address of other end of the SOCKET connection. use Socket; $hersockaddr = getpeername(SOCK); - ($port, $iaddr) = unpack_sockaddr_in($hersockaddr); + ($port, $iaddr) = sockaddr_in($hersockaddr); $herhostname = gethostbyaddr($iaddr, AF_INET); $herstraddr = inet_ntoa($iaddr); @@ -1641,7 +1772,7 @@ Returns the current process group for the specified PID. Use a PID of C<0> to get the current process group for the current process. Will raise an exception if used on a machine that doesn't implement getpgrp(2). If PID is omitted, returns process -group of current process. Note that the POSIX version of C<getpgrp()> +group of current process. Note that the POSIX version of C<getpgrp> does not accept a PID argument, so only C<PID==0> is truly portable. =item getppid @@ -1728,6 +1859,14 @@ various get routines are as follows: (If the entry doesn't exist you get a null list.) +The exact meaning of the $gcos field varies but it usually contains +the real name of the user (as opposed to the login name) and other +information pertaining to the user. Beware, however, that in many +system users are able to change this information and therefore it +cannot be trusted and therefore the $gcos is is tainted (see +L<perlsec>). The $passwd and $shell, user's encrypted password and +login shell, are also tainted, because of the same reason. + In scalar context, you get the name, unless the function was a lookup by name, in which case you get the other thing, whatever it is. (If the entry doesn't exist you get the undefined value.) For example: @@ -1740,28 +1879,27 @@ lookup by name, in which case you get the other thing, whatever it is. $name = getgrent(); #etc. -In I<getpw*()> the fields C<$quota>, C<$comment>, and C<$expire> are -special cases in the sense that in many systems they are unsupported. -If the C<$quota> is unsupported, it is an empty scalar. If it is -supported, it usually encodes the disk quota. If the C<$comment> -field is unsupported, it is an empty scalar. If it is supported it -usually encodes some administrative comment about the user. In some -systems the $quota field may be C<$change> or C<$age>, fields that have -to do with password aging. In some systems the C<$comment> field may -be C<$class>. The C<$expire> field, if present, encodes the expiration -period of the account or the password. For the availability and the -exact meaning of these fields in your system, please consult your -getpwnam(3) documentation and your F<pwd.h> file. You can also find -out from within Perl what your C<$quota> and C<$comment> fields mean -and whether you have the C<$expire> field by using the C<Config> module -and the values C<d_pwquota>, C<d_pwage>, C<d_pwchange>, C<d_pwcomment>, -and C<d_pwexpire>. Shadow password files are only supported if your -vendor has implemented them in the intuitive fashion that calling the -regular C library routines gets the shadow versions if you're running -under privilege. Those that incorrectly implement a separate library -call are not supported. - -The C<$members> value returned by I<getgr*()> is a space separated list of +In I<getpw*()> the fields $quota, $comment, and $expire are special +cases in the sense that in many systems they are unsupported. If the +$quota is unsupported, it is an empty scalar. If it is supported, it +usually encodes the disk quota. If the $comment field is unsupported, +it is an empty scalar. If it is supported it usually encodes some +administrative comment about the user. In some systems the $quota +field may be $change or $age, fields that have to do with password +aging. In some systems the $comment field may be $class. The $expire +field, if present, encodes the expiration period of the account or the +password. For the availability and the exact meaning of these fields +in your system, please consult your getpwnam(3) documentation and your +F<pwd.h> file. You can also find out from within Perl what your +$quota and $comment fields mean and whether you have the $expire field +by using the C<Config> module and the values C<d_pwquota>, C<d_pwage>, +C<d_pwchange>, C<d_pwcomment>, and C<d_pwexpire>. Shadow password +files are only supported if your vendor has implemented them in the +intuitive fashion that calling the regular C library routines gets the +shadow versions if you're running under privilege. Those that +incorrectly implement a separate library call are not supported. + +The $members value returned by I<getgr*()> is a space separated list of the login names of the members of the group. For the I<gethost*()> functions, if the C<h_errno> variable is supported in @@ -1780,29 +1918,36 @@ The Socket library makes this slightly easier: $name = gethostbyaddr($iaddr, AF_INET); # or going the other way - $straddr = inet_ntoa($iaddr"); + $straddr = inet_ntoa($iaddr); -If you get tired of remembering which element of the return list contains -which return value, by-name interfaces are also provided in modules: -C<File::stat>, C<Net::hostent>, C<Net::netent>, C<Net::protoent>, C<Net::servent>, -C<Time::gmtime>, C<Time::localtime>, and C<User::grent>. These override the -normal built-in, replacing them with versions that return objects with -the appropriate names for each field. For example: +If you get tired of remembering which element of the return list +contains which return value, by-name interfaces are provided +in standard modules: C<File::stat>, C<Net::hostent>, C<Net::netent>, +C<Net::protoent>, C<Net::servent>, C<Time::gmtime>, C<Time::localtime>, +and C<User::grent>. These override the normal built-ins, supplying +versions that return objects with the appropriate names +for each field. For example: use File::stat; use User::pwent; $is_his = (stat($filename)->uid == pwent($whoever)->uid); Even though it looks like they're the same method calls (uid), -they aren't, because a C<File::stat> object is different from a C<User::pwent> object. +they aren't, because a C<File::stat> object is different from +a C<User::pwent> object. =item getsockname SOCKET -Returns the packed sockaddr address of this end of the SOCKET connection. +Returns the packed sockaddr address of this end of the SOCKET connection, +in case you don't know the address because you have several different +IPs that the connection might have come in on. use Socket; $mysockaddr = getsockname(SOCK); - ($port, $myaddr) = unpack_sockaddr_in($mysockaddr); + ($port, $myaddr) = sockaddr_in($mysockaddr); + printf "Connect to %s [%s]\n", + scalar gethostbyaddr($myaddr, AF_INET), + inet_ntoa($myaddr); =item getsockopt SOCKET,LEVEL,OPTNAME @@ -1814,35 +1959,51 @@ Returns the socket option requested, or undef if there is an error. Returns the value of EXPR with filename expansions such as the standard Unix shell F</bin/csh> would do. This is the internal function -implementing the C<E<lt>*.cE<gt>> operator, but you can use it directly. -If EXPR is omitted, C<$_> is used. The C<E<lt>*.cE<gt>> operator is +implementing the C<< <*.c> >> operator, but you can use it directly. +If EXPR is omitted, C<$_> is used. The C<< <*.c> >> operator is discussed in more detail in L<perlop/"I/O Operators">. +Beginning with v5.6.0, this operator is implemented using the standard +C<File::Glob> extension. See L<File::Glob> for details. + =item gmtime EXPR -Converts a time as returned by the time function to a 9-element array +Converts a time as returned by the time function to a 8-element list with the time localized for the standard Greenwich time zone. Typically used as follows: - # 0 1 2 3 4 5 6 7 8 - ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = + # 0 1 2 3 4 5 6 7 + ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime(time); -All array elements are numeric, and come straight out of a struct tm. -In particular this means that C<$mon> has the range C<0..11> and C<$wday> -has the range C<0..6> with sunday as day C<0>. Also, C<$year> is the -number of years since 1900, that is, C<$year> is C<123> in year 2023, -I<not> simply the last two digits of the year. If you assume it is, -then you create non-Y2K-compliant programs--and you wouldn't want to do -that, would you? +All list elements are numeric, and come straight out of the C `struct +tm'. $sec, $min, and $hour are the seconds, minutes, and hours of the +specified time. $mday is the day of the month, and $mon is the month +itself, in the range C<0..11> with 0 indicating January and 11 +indicating December. $year is the number of years since 1900. That +is, $year is C<123> in year 2023. $wday is the day of the week, with +0 indicating Sunday and 3 indicating Wednesday. $yday is the day of +the year, in the range C<1..365> (or C<1..366> in leap years.) + +Note that the $year element is I<not> simply the last two digits of +the year. If you assume it is, then you create non-Y2K-compliant +programs--and you wouldn't want to do that, would you? + +The proper way to get a complete 4-digit year is simply: + + $year += 1900; + +And to get the last two digits of the year (e.g., '01' in 2001) do: + + $year = sprintf("%02d", $year % 100); -If EXPR is omitted, does C<gmtime(time())>. +If EXPR is omitted, C<gmtime()> uses the current time (C<gmtime(time)>). -In scalar context, returns the ctime(3) value: +In scalar context, C<gmtime()> returns the ctime(3) value: $now_string = gmtime; # e.g., "Thu Oct 13 04:54:34 1994" -Also see the C<timegm()> function provided by the C<Time::Local> module, +Also see the C<timegm> function provided by the C<Time::Local> module, and the strftime(3) function available via the POSIX module. This scalar value is B<not> locale dependent (see L<perllocale>), but @@ -1869,10 +2030,10 @@ The C<goto-LABEL> form finds the statement labeled with LABEL and resumes execution there. It may not be used to go into any construct that requires initialization, such as a subroutine or a C<foreach> loop. It also can't be used to go into a construct that is optimized away, -or to get out of a block or subroutine given to C<sort()>. +or to get out of a block or subroutine given to C<sort>. It can be used to go almost anywhere else within the dynamic scope, including out of subroutines, but it's usually better to use some other -construct such as C<last> or C<die()>. The author of Perl has never felt the +construct such as C<last> or C<die>. The author of Perl has never felt the need to use this form of C<goto> (in Perl, that is--C is another matter). The C<goto-EXPR> form expects a label name, whose scope will be resolved @@ -1881,13 +2042,20 @@ necessarily recommended if you're optimizing for maintainability: goto ("FOO", "BAR", "GLARCH")[$i]; -The C<goto-&NAME> form is highly magical, and substitutes a call to the -named subroutine for the currently running subroutine. This is used by -C<AUTOLOAD> subroutines that wish to load another subroutine and then -pretend that the other subroutine had been called in the first place -(except that any modifications to C<@_> in the current subroutine are -propagated to the other subroutine.) After the C<goto>, not even C<caller()> -will be able to tell that this routine was called first. +The C<goto-&NAME> form is quite different from the other forms of C<goto>. +In fact, it isn't a goto in the normal sense at all, and doesn't have +the stigma associated with other gotos. Instead, it +substitutes a call to the named subroutine for the currently running +subroutine. This is used by C<AUTOLOAD> subroutines that wish to load +another subroutine and then pretend that the other subroutine had been +called in the first place (except that any modifications to C<@_> +in the current subroutine are propagated to the other subroutine.) +After the C<goto>, not even C<caller> will be able to tell that this +routine was called first. + +NAME needn't be the name of a subroutine; it can be a scalar variable +containing a code reference, or a block which evaluates to a code +reference. =item grep BLOCK LIST @@ -1898,8 +2066,8 @@ relatives. In particular, it is not limited to using regular expressions. Evaluates the BLOCK or EXPR for each element of LIST (locally setting C<$_> to each element) and returns the list value consisting of those -elements for which the expression evaluated to TRUE. In scalar -context, returns the number of times the expression was TRUE. +elements for which the expression evaluated to true. In scalar +context, returns the number of times the expression was true. @foo = grep(!/^#/, @bar); # weed out comments @@ -1912,11 +2080,11 @@ be used to modify the elements of the array. While this is useful and supported, it can cause bizarre results if the LIST is not a named array. Similarly, grep returns aliases into the original list, much as a for loop's index variable aliases the list elements. That is, modifying an -element of a list returned by grep (for example, in a C<foreach>, C<map()> -or another C<grep()>) actually modifies the element in the original list. +element of a list returned by grep (for example, in a C<foreach>, C<map> +or another C<grep>) actually modifies the element in the original list. This is usually something to be avoided when writing clear code. -See also L</map> for an array composed of the results of the BLOCK or EXPR. +See also L</map> for a list composed of the results of the BLOCK or EXPR. =item hex EXPR @@ -1929,11 +2097,14 @@ L</oct>.) If EXPR is omitted, uses C<$_>. print hex '0xAf'; # prints '175' print hex 'aF'; # same +Hex strings may only represent integers. Strings that would cause +integer overflow trigger a warning. + =item import -There is no builtin C<import()> function. It is just an ordinary +There is no builtin C<import> function. It is just an ordinary method (subroutine) defined (or inherited) by modules that wish to export -names to another module. The C<use()> function calls the C<import()> method +names to another module. The C<use> function calls the C<import> method for the package used. See also L</use()>, L<perlmod>, and L<Exporter>. =item index STR,SUBSTR,POSITION @@ -1958,7 +2129,7 @@ towards C<0>, and two because machine representations of floating point numbers can sometimes produce counterintuitive results. For example, C<int(-6.725/0.025)> produces -268 rather than the correct -269; that's because it's really more like -268.99999999999994315658 instead. Usually, -the C<sprintf()>, C<printf()>, or the C<POSIX::floor> and C<POSIX::ceil> +the C<sprintf>, C<printf>, or the C<POSIX::floor> and C<POSIX::ceil> functions will serve you better than will int(). =item ioctl FILEHANDLE,FUNCTION,SCALAR @@ -1969,37 +2140,25 @@ Implements the ioctl(2) function. You'll probably first have to say to get the correct function definitions. If F<ioctl.ph> doesn't exist or doesn't have the correct definitions you'll have to roll your -own, based on your C header files such as F<E<lt>sys/ioctl.hE<gt>>. +own, based on your C header files such as F<< <sys/ioctl.h> >>. (There is a Perl script called B<h2ph> that comes with the Perl kit that may help you in this, but it's nontrivial.) SCALAR will be read and/or written depending on the FUNCTION--a pointer to the string value of SCALAR -will be passed as the third argument of the actual C<ioctl()> call. (If SCALAR +will be passed as the third argument of the actual C<ioctl> call. (If SCALAR has no string value but does have a numeric value, that value will be passed rather than a pointer to the string value. To guarantee this to be -TRUE, add a C<0> to the scalar before using it.) The C<pack()> and C<unpack()> -functions are useful for manipulating the values of structures used by -C<ioctl()>. The following example sets the erase character to DEL. - - require 'ioctl.ph'; - $getp = &TIOCGETP; - die "NO TIOCGETP" if $@ || !$getp; - $sgttyb_t = "ccccs"; # 4 chars and a short - if (ioctl(STDIN,$getp,$sgttyb)) { - @ary = unpack($sgttyb_t,$sgttyb); - $ary[2] = 127; - $sgttyb = pack($sgttyb_t,@ary); - ioctl(STDIN,&TIOCSETP,$sgttyb) - || die "Can't ioctl: $!"; - } +true, add a C<0> to the scalar before using it.) The C<pack> and C<unpack> +functions may be needed to manipulate the values of structures used by +C<ioctl>. -The return value of C<ioctl()> (and C<fcntl()>) is as follows: +The return value of C<ioctl> (and C<fcntl>) is as follows: if OS returns: then Perl returns: -1 undefined value 0 string "0 but true" anything else that number -Thus Perl returns TRUE on success and FALSE on failure, yet you can +Thus Perl returns true on success and false on failure, yet you can still easily determine the actual value returned by the operating system: @@ -2009,6 +2168,18 @@ system: The special string "C<0> but true" is exempt from B<-w> complaints about improper numeric conversions. +Here's an example of setting a filehandle named C<REMOTE> to be +non-blocking at the system level. You'll have to negotiate C<$|> +on your own, though. + + use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK); + + $flags = fcntl(REMOTE, F_GETFL, 0) + or die "Can't get flags for the socket: $!\n"; + + $flags = fcntl(REMOTE, F_SETFL, $flags | O_NONBLOCK) + or die "Can't set flags for the socket: $!\n"; + =item join EXPR,LIST Joins the separate strings of LIST into a single string with fields @@ -2016,15 +2187,16 @@ separated by the value of EXPR, and returns that new string. Example: $rec = join(':', $login,$passwd,$uid,$gid,$gcos,$home,$shell); -See L</split>. +Beware that unlike C<split>, C<join> doesn't take a pattern as its +first argument. Compare L</split>. =item keys HASH -Returns a list consisting of all the keys of the named hash. (In a +Returns a list consisting of all the keys of the named hash. (In scalar context, returns the number of keys.) The keys are returned in an apparently random order. The actual random order is subject to change in future versions of perl, but it is guaranteed to be the same -order as either the C<values()> or C<each()> function produces (given +order as either the C<values> or C<each> function produces (given that the hash has not been modified). As a side effect, it resets HASH's iterator. @@ -2032,7 +2204,7 @@ Here is yet another way to print your environment: @keys = keys %ENV; @values = values %ENV; - while ($#keys >= 0) { + while (@keys) { print pop(@keys), '=', pop(@values), "\n"; } @@ -2042,14 +2214,14 @@ or how about sorted by key: print $key, '=', $ENV{$key}, "\n"; } -To sort a hash by value, you'll need to use a C<sort()> function. +To sort a hash by value, you'll need to use a C<sort> function. Here's a descending numeric sort of a hash by its values: foreach $key (sort { $hash{$b} <=> $hash{$a} } keys %hash) { printf "%4d %s\n", $hash{$key}, $key; } -As an lvalue C<keys()> allows you to increase the number of hash buckets +As an lvalue C<keys> allows you to increase the number of hash buckets allocated for the given hash. This can gain you a measure of efficiency if you know the hash is going to get big. (This is similar to pre-extending an array by assigning a larger number to $#array.) If you say @@ -2061,21 +2233,26 @@ in fact, since it rounds up to the next power of two. These buckets will be retained even if you do C<%hash = ()>, use C<undef %hash> if you want to free the storage while C<%hash> is still in scope. You can't shrink the number of buckets allocated for the hash using -C<keys()> in this way (but you needn't worry about doing this by accident, +C<keys> in this way (but you needn't worry about doing this by accident, as trying has no effect). -See also C<each()>, C<values()> and C<sort()>. +See also C<each>, C<values> and C<sort>. -=item kill LIST +=item kill SIGNAL, LIST -Sends a signal to a list of processes. The first element of -the list must be the signal to send. Returns the number of -processes successfully signaled. +Sends a signal to a list of processes. Returns the number of +processes successfully signaled (which is not necessarily the +same as the number actually killed). $cnt = kill 1, $child1, $child2; kill 9, @goners; -Unlike in the shell, in Perl if the I<SIGNAL> is negative, it kills +If SIGNAL is zero, no signal is sent to the process. This is a +useful way to check that the process is alive and hasn't changed +its UID. See L<perlport> for notes on the portability of this +construct. + +Unlike in the shell, if SIGNAL is negative, it kills process groups instead of processes. (On System V, a negative I<PROCESS> number will also kill process groups, but that's not portable.) That means you usually want to use positive not negative signals. You may also @@ -2099,6 +2276,10 @@ C<last> cannot be used to exit a block which returns a value such as C<eval {}>, C<sub {}> or C<do {}>, and should not be used to exit a grep() or map() operation. +Note that a block by itself is semantically identical to a loop +that executes once. Thus C<last> can be used to effect an early +exit out of such a block. + See also L</continue> for an illustration of how C<last>, C<next>, and C<redo> work. @@ -2108,7 +2289,8 @@ C<redo> work. Returns an lowercased version of EXPR. This is the internal function implementing the C<\L> escape in double-quoted strings. -Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale>. +Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale> +and L<utf8>. If EXPR is omitted, uses C<$_>. @@ -2133,17 +2315,17 @@ For that, use C<scalar @array> and C<scalar keys %hash> respectively. =item link OLDFILE,NEWFILE -Creates a new filename linked to the old filename. Returns TRUE for -success, FALSE otherwise. +Creates a new filename linked to the old filename. Returns true for +success, false otherwise. =item listen SOCKET,QUEUESIZE -Does the same thing that the listen system call does. Returns TRUE if -it succeeded, FALSE otherwise. See the example in L<perlipc/"Sockets: Client/Server Communication">. +Does the same thing that the listen system call does. Returns true if +it succeeded, false otherwise. See the example in L<perlipc/"Sockets: Client/Server Communication">. =item local EXPR -You really probably want to be using C<my()> instead, because C<local()> isn't +You really probably want to be using C<my> instead, because C<local> isn't what most people think of as "local". See L<perlsub/"Private Variables via my()"> for details. @@ -2154,7 +2336,7 @@ for details, including issues with tied arrays and hashes. =item localtime EXPR -Converts a time as returned by the time function to a 9-element array +Converts a time as returned by the time function to a 9-element list with the time analyzed for the local time zone. Typically used as follows: @@ -2162,26 +2344,43 @@ follows: ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); -All array elements are numeric, and come straight out of a struct tm. -In particular this means that C<$mon> has the range C<0..11> and C<$wday> -has the range C<0..6> with sunday as day C<0>. Also, C<$year> is the -number of years since 1900, that is, C<$year> is C<123> in year 2023, -and I<not> simply the last two digits of the year. If you assume it is, -then you create non-Y2K-compliant programs--and you wouldn't want to do -that, would you? +All list elements are numeric, and come straight out of the C `struct +tm'. $sec, $min, and $hour are the seconds, minutes, and hours of the +specified time. $mday is the day of the month, and $mon is the month +itself, in the range C<0..11> with 0 indicating January and 11 +indicating December. $year is the number of years since 1900. That +is, $year is C<123> in year 2023. $wday is the day of the week, with +0 indicating Sunday and 3 indicating Wednesday. $yday is the day of +the year, in the range C<1..365> (or C<1..366> in leap years.) $isdst +is true if the specified time occurs during daylight savings time, +false otherwise. + +Note that the $year element is I<not> simply the last two digits of +the year. If you assume it is, then you create non-Y2K-compliant +programs--and you wouldn't want to do that, would you? + +The proper way to get a complete 4-digit year is simply: + + $year += 1900; -If EXPR is omitted, uses the current time (C<localtime(time)>). +And to get the last two digits of the year (e.g., '01' in 2001) do: -In scalar context, returns the ctime(3) value: + $year = sprintf("%02d", $year % 100); + +If EXPR is omitted, C<localtime()> uses the current time (C<localtime(time)>). + +In scalar context, C<localtime()> returns the ctime(3) value: $now_string = localtime; # e.g., "Thu Oct 13 04:54:34 1994" This scalar value is B<not> locale dependent, see L<perllocale>, but -instead a Perl builtin. Also see the C<Time::Local> module, and the -strftime(3) and mktime(3) function available via the POSIX module. To -get somewhat similar but locale dependent date strings, set up your -locale environment variables appropriately (please see L<perllocale>) -and try for example: +instead a Perl builtin. Also see the C<Time::Local> module +(to convert the second, minutes, hours, ... back to seconds since the +stroke of midnight the 1st of January 1970, the value returned by +time()), and the strftime(3) and mktime(3) functions available via the +POSIX module. To get somewhat similar but locale dependent date +strings, set up your locale environment variables appropriately +(please see L<perllocale>) and try for example: use POSIX qw(strftime); $now_string = strftime "%a %b %e %H:%M:%S %Y", localtime; @@ -2189,13 +2388,24 @@ and try for example: Note that the C<%a> and C<%b>, the short forms of the day of the week and the month of the year, may not necessarily be three characters wide. +=item lock + + lock I<THING> + +This function places an advisory lock on a variable, subroutine, +or referenced object contained in I<THING> until the lock goes out +of scope. This is a built-in function only if your version of Perl +was built with threading enabled, and if you've said C<use Threads>. +Otherwise a user-defined function by this name will be called. See +L<Thread>. + =item log EXPR =item log Returns the natural logarithm (base I<e>) of EXPR. If EXPR is omitted, returns log of C<$_>. To get the log of another base, use basic algebra: -The base-N log of a number is is equal to the natural log of that number +The base-N log of a number is equal to the natural log of that number divided by the natural log of N. For example: sub log10 { @@ -2211,10 +2421,10 @@ See also L</exp> for the inverse operation. =item lstat -Does the same thing as the C<stat()> function (including setting the +Does the same thing as the C<stat> function (including setting the special C<_> filehandle) but stats a symbolic link instead of the file the symbolic link points to. If symbolic links are unimplemented on -your system, a normal C<stat()> is done. +your system, a normal C<stat> is done. If EXPR is omitted, stats C<$_>. @@ -2226,12 +2436,12 @@ The match operator. See L<perlop>. =item map EXPR,LIST -Evaluates the BLOCK or EXPR for each element of LIST (locally setting C<$_> to each -element) and returns the list value composed of the results of each such -evaluation. Evaluates BLOCK or EXPR in a list context, so each element of LIST -may produce zero, one, or more elements in the returned value. - -In scalar context, returns the total number of elements so generated. +Evaluates the BLOCK or EXPR for each element of LIST (locally setting +C<$_> to each element) and returns the list value composed of the +results of each such evaluation. In scalar context, returns the +total number of elements so generated. Evaluates BLOCK or EXPR in +list context, so each element of LIST may produce zero, one, or +more elements in the returned value. @chars = map(chr, @nums); @@ -2253,18 +2463,21 @@ Using a regular C<foreach> loop for this purpose would be clearer in most cases. See also L</grep> for an array composed of those items of the original list for which the BLOCK or EXPR evaluates to true. -=item mkdir FILENAME,MODE +=item mkdir FILENAME,MASK + +=item mkdir FILENAME Creates the directory specified by FILENAME, with permissions -specified by MODE (as modified by C<umask>). If it succeeds it -returns TRUE, otherwise it returns FALSE and sets C<$!> (errno). +specified by MASK (as modified by C<umask>). If it succeeds it +returns true, otherwise it returns false and sets C<$!> (errno). +If omitted, MASK defaults to 0777. -In general, it is better to create directories with permissive MODEs, +In general, it is better to create directories with permissive MASK, and let the user modify that with their C<umask>, than it is to supply -a restrictive MODE and give the user no way to be more permissive. +a restrictive MASK and give the user no way to be more permissive. The exceptions to this rule are when the file or directory should be kept private (mail files, for instance). The perlfunc(1) entry on -C<umask> discusses the choice of MODE in more detail. +C<umask> discusses the choice of MASK in more detail. =item msgctl ID,CMD,ARG @@ -2274,37 +2487,42 @@ Calls the System V IPC function msgctl(2). You'll probably have to say first to get the correct constant definitions. If CMD is C<IPC_STAT>, then ARG must be a variable which will hold the returned C<msqid_ds> -structure. Returns like C<ioctl()>: the undefined value for error, "C<0> but -true" for zero, or the actual return value otherwise. See also -C<IPC::SysV> and C<IPC::Semaphore::Msg> documentation. +structure. Returns like C<ioctl>: the undefined value for error, +C<"0 but true"> for zero, or the actual return value otherwise. See also +C<IPC::SysV> and C<IPC::Semaphore> documentation. =item msgget KEY,FLAGS Calls the System V IPC function msgget(2). Returns the message queue id, or the undefined value if there is an error. See also C<IPC::SysV> -and C<IPC::SysV::Msg> documentation. - -=item msgsnd ID,MSG,FLAGS - -Calls the System V IPC function msgsnd to send the message MSG to the -message queue ID. MSG must begin with the long integer message type, -which may be created with C<pack("l", $type)>. Returns TRUE if -successful, or FALSE if there is an error. See also C<IPC::SysV> -and C<IPC::SysV::Msg> documentation. +and C<IPC::Msg> documentation. =item msgrcv ID,VAR,SIZE,TYPE,FLAGS Calls the System V IPC function msgrcv to receive a message from message queue ID into variable VAR with a maximum message size of -SIZE. Note that if a message is received, the message type will be -the first thing in VAR, and the maximum length of VAR is SIZE plus the -size of the message type. Returns TRUE if successful, or FALSE if -there is an error. See also C<IPC::SysV> and C<IPC::SysV::Msg> documentation. +SIZE. Note that when a message is received, the message type as a +native long integer will be the first thing in VAR, followed by the +actual message. This packing may be opened with C<unpack("l! a*")>. +Taints the variable. Returns true if successful, or false if there is +an error. See also C<IPC::SysV> and C<IPC::SysV::Msg> documentation. + +=item msgsnd ID,MSG,FLAGS + +Calls the System V IPC function msgsnd to send the message MSG to the +message queue ID. MSG must begin with the native long integer message +type, and be followed by the length of the actual message, and finally +the message itself. This kind of packing can be achieved with +C<pack("l! a*", $type, $message)>. Returns true if successful, +or false if there is an error. See also C<IPC::SysV> +and C<IPC::SysV::Msg> documentation. =item my EXPR -A C<my()> declares the listed variables to be local (lexically) to the -enclosing block, file, or C<eval()>. If +=item my EXPR : ATTRIBUTES + +A C<my> declares the listed variables to be local (lexically) to the +enclosing block, file, or C<eval>. If more than one value is listed, the list must be placed in parentheses. See L<perlsub/"Private Variables via my()"> for details. @@ -2328,6 +2546,9 @@ C<next> cannot be used to exit a block which returns a value such as C<eval {}>, C<sub {}> or C<do {}>, and should not be used to exit a grep() or map() operation. +Note that a block by itself is semantically identical to a loop +that executes once. Thus C<next> will exit such a block early. + See also L</continue> for an illustration of how C<last>, C<next>, and C<redo> work. @@ -2347,10 +2568,18 @@ hex in the standard Perl or C notation: $val = oct($val) if $val =~ /^0/; -If EXPR is omitted, uses C<$_>. This function is commonly used when -a string such as C<644> needs to be converted into a file mode, for -example. (Although perl will automatically convert strings into -numbers as needed, this automatic conversion assumes base 10.) +If EXPR is omitted, uses C<$_>. To go the other way (produce a number +in octal), use sprintf() or printf(): + + $perms = (stat("filename"))[2] & 07777; + $oct_perms = sprintf "%lo", $perms; + +The oct() function is commonly used when a string such as C<644> needs +to be converted into a file mode, for example. (Although perl will +automatically convert strings into numbers as needed, this automatic +conversion assumes base 10.) + +=item open FILEHANDLE,MODE,LIST =item open FILEHANDLE,EXPR @@ -2358,56 +2587,75 @@ numbers as needed, this automatic conversion assumes base 10.) Opens the file whose filename is given by EXPR, and associates it with FILEHANDLE. If FILEHANDLE is an expression, its value is used as the -name of the real filehandle wanted. If EXPR is omitted, the scalar +name of the real filehandle wanted. (This is considered a symbolic +reference, so C<use strict 'refs'> should I<not> be in effect.) + +If EXPR is omitted, the scalar variable of the same name as the FILEHANDLE contains the filename. -(Note that lexical variables--those declared with C<my()>--will not work -for this purpose; so if you're using C<my()>, specify EXPR in your call +(Note that lexical variables--those declared with C<my>--will not work +for this purpose; so if you're using C<my>, specify EXPR in your call to open.) See L<perlopentut> for a kinder, gentler explanation of opening files. -If the filename begins with C<'E<lt>'> or nothing, the file is opened for input. -If the filename begins with C<'E<gt>'>, the file is truncated and opened for -output, being created if necessary. If the filename begins with C<'E<gt>E<gt>'>, +If MODE is C<< '<' >> or nothing, the file is opened for input. +If MODE is C<< '>' >>, the file is truncated and opened for +output, being created if necessary. If MODE is C<<< '>>' >>>, the file is opened for appending, again being created if necessary. -You can put a C<'+'> in front of the C<'E<gt>'> or C<'E<lt>'> to indicate that -you want both read and write access to the file; thus C<'+E<lt>'> is almost -always preferred for read/write updates--the C<'+E<gt>'> mode would clobber the +You can put a C<'+'> in front of the C<< '>' >> or C<< '<' >> to indicate that +you want both read and write access to the file; thus C<< '+<' >> is almost +always preferred for read/write updates--the C<< '+>' >> mode would clobber the file first. You can't usually use either read-write mode for updating textfiles, since they have variable length records. See the B<-i> switch in L<perlrun> for a better approach. The file is created with permissions of C<0666> modified by the process' C<umask> value. -The prefix and the filename may be separated with spaces. -These various prefixes correspond to the fopen(3) modes of C<'r'>, C<'r+'>, C<'w'>, -C<'w+'>, C<'a'>, and C<'a+'>. +These various prefixes correspond to the fopen(3) modes of C<'r'>, C<'r+'>, +C<'w'>, C<'w+'>, C<'a'>, and C<'a+'>. + +In the 2-arguments (and 1-argument) form of the call the mode and +filename should be concatenated (in this order), possibly separated by +spaces. It is possible to omit the mode if the mode is C<< '<' >>. If the filename begins with C<'|'>, the filename is interpreted as a command to which output is to be piped, and if the filename ends with a C<'|'>, the filename is interpreted as a command which pipes output to us. See L<perlipc/"Using open() for IPC"> -for more examples of this. (You are not allowed to C<open()> to a command +for more examples of this. (You are not allowed to C<open> to a command +that pipes both in I<and> out, but see L<IPC::Open2>, L<IPC::Open3>, +and L<perlipc/"Bidirectional Communication with Another Process"> +for alternatives.) + +If MODE is C<'|-'>, the filename is interpreted as a +command to which output is to be piped, and if MODE is +C<'-|'>, the filename is interpreted as a command which pipes output to +us. In the 2-arguments (and 1-argument) form one should replace dash +(C<'-'>) with the command. See L<perlipc/"Using open() for IPC"> +for more examples of this. (You are not allowed to C<open> to a command that pipes both in I<and> out, but see L<IPC::Open2>, L<IPC::Open3>, and L<perlipc/"Bidirectional Communication"> for alternatives.) -Opening C<'-'> opens STDIN and opening C<'E<gt>-'> opens STDOUT. Open returns -nonzero upon success, the undefined value otherwise. If the C<open()> +In the 2-arguments (and 1-argument) form opening C<'-'> opens STDIN +and opening C<< '>-' >> opens STDOUT. + +Open returns +nonzero upon success, the undefined value otherwise. If the C<open> involved a pipe, the return value happens to be the pid of the subprocess. If you're unfortunate enough to be running Perl on a system that distinguishes between text files and binary files (modern operating systems don't care), then you should check out L</binmode> for tips for -dealing with this. The key distinction between systems that need C<binmode()> +dealing with this. The key distinction between systems that need C<binmode> and those that don't is their text file formats. Systems like Unix, MacOS, and Plan9, which delimit lines with a single character, and which encode that -character in C as C<"\n">, do not need C<binmode()>. The rest need it. +character in C as C<"\n">, do not need C<binmode>. The rest need it. When opening a file, it's usually a bad idea to continue normal execution -if the request failed, so C<open()> is frequently used in connection with -C<die()>. Even if C<die()> won't do what you want (say, in a CGI script, +if the request failed, so C<open> is frequently used in connection with +C<die>. Even if C<die> won't do what you want (say, in a CGI script, where you want to make a nicely formatted error message (but there are modules that can help with that problem)) you should always check -the return value from opening a file. The infrequent exception is when +the return value from opening a file. The infrequent exception is when working with an unopened filehandle is actually what you want to do. Examples: @@ -2416,16 +2664,22 @@ Examples: open ARTICLE or die "Can't find article $ARTICLE: $!\n"; while (<ARTICLE>) {... - open(LOG, '>>/usr/spool/news/twitlog'); # (log is reserved) + open(LOG, '>>/usr/spool/news/twitlog'); # (log is reserved) # if the open fails, output is discarded - open(DBASE, '+<dbase.mine') # open for update + open(DBASE, '+<', 'dbase.mine') # open for update + or die "Can't open 'dbase.mine' for update: $!"; + + open(DBASE, '+<dbase.mine') # ditto or die "Can't open 'dbase.mine' for update: $!"; - open(ARTICLE, "caesar <$article |") # decrypt article + open(ARTICLE, '-|', "caesar <$article") # decrypt article + or die "Can't start caesar: $!"; + + open(ARTICLE, "caesar <$article |") # ditto or die "Can't start caesar: $!"; - open(EXTRACT, "|sort >/tmp/Tmp$$") # $$ is our process id + open(EXTRACT, "|sort >/tmp/Tmp$$") # $$ is our process id or die "Can't start sort: $!"; # process argument list of files along with any includes @@ -2453,13 +2707,15 @@ Examples: } You may also, in the Bourne shell tradition, specify an EXPR beginning -with C<'E<gt>&'>, in which case the rest of the string is interpreted as the +with C<< '>&' >>, in which case the rest of the string is interpreted as the name of a filehandle (or file descriptor, if numeric) to be -duped and opened. You may use C<&> after C<E<gt>>, C<E<gt>E<gt>>, C<E<lt>>, C<+E<gt>>, -C<+E<gt>E<gt>>, and C<+E<lt>>. The +duped and opened. You may use C<&> after C<< > >>, C<<< >> >>>, +C<< < >>, C<< +> >>, C<<< +>> >>>, and C<< +< >>. The mode you specify should match the mode of the original filehandle. (Duping a filehandle does not take into account any existing contents of -stdio buffers.) +stdio buffers.) Duping file handles is not yet supported for 3-argument +open(). + Here is a script that saves, redirects, and restores STDOUT and STDERR: @@ -2467,8 +2723,8 @@ STDERR: open(OLDOUT, ">&STDOUT"); open(OLDERR, ">&STDERR"); - open(STDOUT, ">foo.out") || die "Can't redirect stdout"; - open(STDERR, ">&STDOUT") || die "Can't dup stdout"; + open(STDOUT, '>', "foo.out") || die "Can't redirect stdout"; + open(STDERR, ">&STDOUT") || die "Can't dup stdout"; select(STDERR); $| = 1; # make unbuffered select(STDOUT); $| = 1; # make unbuffered @@ -2485,13 +2741,20 @@ STDERR: print STDOUT "stdout 2\n"; print STDERR "stderr 2\n"; -If you specify C<'E<lt>&=N'>, where C<N> is a number, then Perl will do an -equivalent of C's C<fdopen()> of that file descriptor; this is more +If you specify C<< '<&=N' >>, where C<N> is a number, then Perl will do an +equivalent of C's C<fdopen> of that file descriptor; this is more parsimonious of file descriptors. For example: open(FILEHANDLE, "<&=$fd") -If you open a pipe on the command C<'-'>, i.e., either C<'|-'> or C<'-|'>, then +Note that this feature depends on the fdopen() C library function. +On many UNIX systems, fdopen() is known to fail when file descriptors +exceed a certain value, typically 255. If you need more file +descriptors than that, consider rebuilding Perl to use the C<sfio> +library. + +If you open a pipe on the command C<'-'>, i.e., either C<'|-'> or C<'-|'> +with 2-arguments (or 1-argument) form of open(), then there is an implicit fork done, and the return value of open is the pid of the child within the parent process, and C<0> within the child process. (Use C<defined($pid)> to determine whether the open was successful.) @@ -2502,26 +2765,33 @@ the new STDOUT or STDIN. Typically this is used like the normal piped open when you want to exercise more control over just how the pipe command gets executed, such as when you are running setuid, and don't want to have to scan shell commands for metacharacters. -The following pairs are more or less equivalent: +The following triples are more or less equivalent: open(FOO, "|tr '[a-z]' '[A-Z]'"); - open(FOO, "|-") || exec 'tr', '[a-z]', '[A-Z]'; + open(FOO, '|-', "tr '[a-z]' '[A-Z]'"); + open(FOO, '|-') || exec 'tr', '[a-z]', '[A-Z]'; open(FOO, "cat -n '$file'|"); - open(FOO, "-|") || exec 'cat', '-n', $file; + open(FOO, '-|', "cat -n '$file'"); + open(FOO, '-|') || exec 'cat', '-n', $file; See L<perlipc/"Safe Pipe Opens"> for more examples of this. -NOTE: On any operation that may do a fork, any unflushed buffers remain -unflushed in both processes, which means you may need to set C<$|> to -avoid duplicate output. On systems that support a close-on-exec flag on -files, the flag will be set for the newly opened file descriptor as -determined by the value of $^F. See L<perlvar/$^F>. +Beginning with v5.6.0, Perl will attempt to flush all files opened for +output before any operation that may do a fork, but this may not be +supported on some platforms (see L<perlport>). To be safe, you may need +to set C<$|> ($AUTOFLUSH in English) or call the C<autoflush()> method +of C<IO::Handle> on any open handles. + +On systems that support a +close-on-exec flag on files, the flag will be set for the newly opened +file descriptor as determined by the value of $^F. See L<perlvar/$^F>. Closing any piped filehandle causes the parent process to wait for the child to finish, and returns the status value in C<$?>. -The filename passed to open will have leading and trailing +The filename passed to 2-argument (or 1-argument) form of open() +will have leading and trailing whitespace deleted, and the normal redirection characters honored. This property, known as "magic open", can often be used to good effect. A user could specify a filename of @@ -2530,14 +2800,32 @@ F<"rsh cat file |">, or you could change certain filenames as needed: $filename =~ s/(.*\.gz)\s*$/gzip -dc < $1|/; open(FH, $filename) or die "Can't open $filename: $!"; -However, to open a file with arbitrary weird characters in it, it's -necessary to protect any leading and trailing whitespace: +Use 3-argument form to open a file with arbitrary weird characters in it, + + open(FOO, '<', $file); + +otherwise it's necessary to protect any leading and trailing whitespace: $file =~ s#^(\s)#./$1#; open(FOO, "< $file\0"); -If you want a "real" C C<open()> (see L<open(2)> on your system), then you -should use the C<sysopen()> function, which involves no such magic. This is +(this may not work on some bizzare filesystems). One should +conscientiously choose between the the I<magic> and 3-arguments form +of open(): + + open IN, $ARGV[0]; + +will allow the user to specify an argument of the form C<"rsh cat file |">, +but will not work on a filename which happens to have a trailing space, while + + open IN, '<', $ARGV[0]; + +will have exactly the opposite restrictions. + +If you want a "real" C C<open> (see L<open(2)> on your system), then you +should use the C<sysopen> function, which involves no such magic (but +may use subtly different filemodes than Perl open(), which is mapped +to C fopen()). This is another way to protect your filenames from interpretation. For example: use IO::Handle; @@ -2570,21 +2858,67 @@ See L</seek> for some details about mixing reading and writing. =item opendir DIRHANDLE,EXPR -Opens a directory named EXPR for processing by C<readdir()>, C<telldir()>, -C<seekdir()>, C<rewinddir()>, and C<closedir()>. Returns TRUE if successful. +Opens a directory named EXPR for processing by C<readdir>, C<telldir>, +C<seekdir>, C<rewinddir>, and C<closedir>. Returns true if successful. DIRHANDLEs have their own namespace separate from FILEHANDLEs. =item ord EXPR =item ord -Returns the numeric ascii value of the first character of EXPR. If +Returns the numeric (ASCII or Unicode) value of the first character of EXPR. If EXPR is omitted, uses C<$_>. For the reverse, see L</chr>. +See L<utf8> for more about Unicode. + +=item our EXPR + +An C<our> declares the listed variables to be valid globals within +the enclosing block, file, or C<eval>. That is, it has the same +scoping rules as a "my" declaration, but does not create a local +variable. If more than one value is listed, the list must be placed +in parentheses. The C<our> declaration has no semantic effect unless +"use strict vars" is in effect, in which case it lets you use the +declared global variable without qualifying it with a package name. +(But only within the lexical scope of the C<our> declaration. In this +it differs from "use vars", which is package scoped.) + +An C<our> declaration declares a global variable that will be visible +across its entire lexical scope, even across package boundaries. The +package in which the variable is entered is determined at the point +of the declaration, not at the point of use. This means the following +behavior holds: + + package Foo; + our $bar; # declares $Foo::bar for rest of lexical scope + $bar = 20; + + package Bar; + print $bar; # prints 20 + +Multiple C<our> declarations in the same lexical scope are allowed +if they are in different packages. If they happened to be in the same +package, Perl will emit warnings if you have asked for them. + + use warnings; + package Foo; + our $bar; # declares $Foo::bar for rest of lexical scope + $bar = 20; + + package Bar; + our $bar = 30; # declares $Bar::bar for rest of lexical scope + print $bar; # prints 30 + + our $bar; # emits warning =item pack TEMPLATE,LIST -Takes an array or list of values and packs it into a binary structure, -returning the string containing the structure. The TEMPLATE is a +Takes a LIST of values and converts it into a string using the rules +given by the TEMPLATE. The resulting string is the concatenation of +the converted values. Typically, each converted value looks +like its machine-level representation. For example, on 32-bit machines +a converted integer may be represented by a sequence of 4 bytes. + +The TEMPLATE is a sequence of characters that give the order and type of values, as follows: @@ -2592,22 +2926,23 @@ follows: A An ascii string, will be space padded. Z A null terminated (asciz) string, will be null padded. - b A bit string (ascending bit order, like vec()). - B A bit string (descending bit order). + b A bit string (ascending bit order inside each byte, like vec()). + B A bit string (descending bit order inside each byte). h A hex string (low nybble first). H A hex string (high nybble first). c A signed char value. - C An unsigned char value. + C An unsigned char value. Only does bytes. See U for Unicode. s A signed short value. S An unsigned short value. (This 'short' is _exactly_ 16 bits, which may differ from - what a local C compiler calls 'short'.) + what a local C compiler calls 'short'. If you want + native-length shorts, use the '!' suffix.) i A signed integer value. I An unsigned integer value. - (This 'integer' is _at least_ 32 bits wide. Its exact + (This 'integer' is _at_least_ 32 bits wide. Its exact size depends on what a local C compiler calls 'int', and may even be larger than the 'long' described in the next item.) @@ -2615,19 +2950,20 @@ follows: l A signed long value. L An unsigned long value. (This 'long' is _exactly_ 32 bits, which may differ from - what a local C compiler calls 'long'.) + what a local C compiler calls 'long'. If you want + native-length longs, use the '!' suffix.) - n A short in "network" (big-endian) order. - N A long in "network" (big-endian) order. - v A short in "VAX" (little-endian) order. - V A long in "VAX" (little-endian) order. + n An unsigned short in "network" (big-endian) order. + N An unsigned long in "network" (big-endian) order. + v An unsigned short in "VAX" (little-endian) order. + V An unsigned long in "VAX" (little-endian) order. (These 'shorts' and 'longs' are _exactly_ 16 bits and _exactly_ 32 bits, respectively.) q A signed quad (64-bit) value. Q An unsigned quad value. - (Available only if your system supports 64-bit integer values - _and_ if Perl has been compiled to support those. + (Quads are available only if your system supports 64-bit + integer values _and_ if Perl has been compiled to support those. Causes a fatal error otherwise.) f A single-precision float in the native format. @@ -2637,6 +2973,8 @@ follows: P A pointer to a structure (fixed-length string). u A uuencoded string. + U A Unicode character number. Encodes to UTF-8 internally. + Works even if C<use utf8> is not in effect. w A BER compressed integer. Its bytes represent an unsigned integer in base 128, most significant digit first, with as @@ -2654,61 +2992,175 @@ The following rules apply: =item * Each letter may optionally be followed by a number giving a repeat -count. With all types except C<"a">, C<"A">, C<"Z">, C<"b">, C<"B">, C<"h">, -C<"H">, and C<"P"> the pack function will gobble up that many values from +count. With all types except C<a>, C<A>, C<Z>, C<b>, C<B>, C<h>, +C<H>, and C<P> the pack function will gobble up that many values from the LIST. A C<*> for the repeat count means to use however many items are -left. +left, except for C<@>, C<x>, C<X>, where it is equivalent +to C<0>, and C<u>, where it is equivalent to 1 (or 45, what is the +same). + +When used with C<Z>, C<*> results in the addition of a trailing null +byte (so the packed result will be one longer than the byte C<length> +of the item). + +The repeat count for C<u> is interpreted as the maximal number of bytes +to encode per line of output, with 0 and 1 replaced by 45. =item * -The C<"a">, C<"A">, and C<"Z"> types gobble just one value, but pack it as a +The C<a>, C<A>, and C<Z> types gobble just one value, but pack it as a string of length count, padding with nulls or spaces as necessary. When -unpacking, C<"A"> strips trailing spaces and nulls, C<"Z"> strips everything -after the first null, and C<"a"> returns data verbatim. +unpacking, C<A> strips trailing spaces and nulls, C<Z> strips everything +after the first null, and C<a> returns data verbatim. When packing, +C<a>, and C<Z> are equivalent. + +If the value-to-pack is too long, it is truncated. If too long and an +explicit count is provided, C<Z> packs only C<$count-1> bytes, followed +by a null byte. Thus C<Z> always packs a trailing null byte under +all circumstances. =item * -Likewise, the C<"b"> and C<"B"> fields pack a string that many bits long. +Likewise, the C<b> and C<B> fields pack a string that many bits long. +Each byte of the input field of pack() generates 1 bit of the result. +Each result bit is based on the least-significant bit of the corresponding +input byte, i.e., on C<ord($byte)%2>. In particular, bytes C<"0"> and +C<"1"> generate bits 0 and 1, as do bytes C<"\0"> and C<"\1">. + +Starting from the beginning of the input string of pack(), each 8-tuple +of bytes is converted to 1 byte of output. With format C<b> +the first byte of the 8-tuple determines the least-significant bit of a +byte, and with format C<B> it determines the most-significant bit of +a byte. + +If the length of the input string is not exactly divisible by 8, the +remainder is packed as if the input string were padded by null bytes +at the end. Similarly, during unpack()ing the "extra" bits are ignored. + +If the input string of pack() is longer than needed, extra bytes are ignored. +A C<*> for the repeat count of pack() means to use all the bytes of +the input field. On unpack()ing the bits are converted to a string +of C<"0">s and C<"1">s. =item * -The C<"h"> and C<"H"> fields pack a string that many nybbles long. +The C<h> and C<H> fields pack a string that many nybbles (4-bit groups, +representable as hexadecimal digits, 0-9a-f) long. + +Each byte of the input field of pack() generates 4 bits of the result. +For non-alphabetical bytes the result is based on the 4 least-significant +bits of the input byte, i.e., on C<ord($byte)%16>. In particular, +bytes C<"0"> and C<"1"> generate nybbles 0 and 1, as do bytes +C<"\0"> and C<"\1">. For bytes C<"a".."f"> and C<"A".."F"> the result +is compatible with the usual hexadecimal digits, so that C<"a"> and +C<"A"> both generate the nybble C<0xa==10>. The result for bytes +C<"g".."z"> and C<"G".."Z"> is not well-defined. + +Starting from the beginning of the input string of pack(), each pair +of bytes is converted to 1 byte of output. With format C<h> the +first byte of the pair determines the least-significant nybble of the +output byte, and with format C<H> it determines the most-significant +nybble. + +If the length of the input string is not even, it behaves as if padded +by a null byte at the end. Similarly, during unpack()ing the "extra" +nybbles are ignored. + +If the input string of pack() is longer than needed, extra bytes are ignored. +A C<*> for the repeat count of pack() means to use all the bytes of +the input field. On unpack()ing the bits are converted to a string +of hexadecimal digits. =item * -The C<"p"> type packs a pointer to a null-terminated string. You are +The C<p> type packs a pointer to a null-terminated string. You are responsible for ensuring the string is not a temporary value (which can potentially get deallocated before you get around to using the packed result). -The C<"P"> type packs a pointer to a structure of the size indicated by the -length. A NULL pointer is created if the corresponding value for C<"p"> or -C<"P"> is C<undef>. +The C<P> type packs a pointer to a structure of the size indicated by the +length. A NULL pointer is created if the corresponding value for C<p> or +C<P> is C<undef>, similarly for unpack(). + +=item * + +The C</> template character allows packing and unpacking of strings where +the packed structure contains a byte count followed by the string itself. +You write I<length-item>C</>I<string-item>. + +The I<length-item> can be any C<pack> template letter, +and describes how the length value is packed. +The ones likely to be of most use are integer-packing ones like +C<n> (for Java strings), C<w> (for ASN.1 or SNMP) +and C<N> (for Sun XDR). + +The I<string-item> must, at present, be C<"A*">, C<"a*"> or C<"Z*">. +For C<unpack> the length of the string is obtained from the I<length-item>, +but if you put in the '*' it will be ignored. + + unpack 'C/a', "\04Gurusamy"; gives 'Guru' + unpack 'a3/A* A*', '007 Bond J '; gives (' Bond','J') + pack 'n/a* w/a*','hello,','world'; gives "\000\006hello,\005world" + +The I<length-item> is not returned explicitly from C<unpack>. + +Adding a count to the I<length-item> letter is unlikely to do anything +useful, unless that letter is C<A>, C<a> or C<Z>. Packing with a +I<length-item> of C<a> or C<Z> may introduce C<"\000"> characters, +which Perl does not regard as legal in numeric strings. + +=item * + +The integer types C<s>, C<S>, C<l>, and C<L> may be +immediately followed by a C<!> suffix to signify native shorts or +longs--as you can see from above for example a bare C<l> does mean +exactly 32 bits, the native C<long> (as seen by the local C compiler) +may be larger. This is an issue mainly in 64-bit platforms. You can +see whether using C<!> makes any difference by + + print length(pack("s")), " ", length(pack("s!")), "\n"; + print length(pack("l")), " ", length(pack("l!")), "\n"; + +C<i!> and C<I!> also work but only because of completeness; +they are identical to C<i> and C<I>. + +The actual sizes (in bytes) of native shorts, ints, longs, and long +longs on the platform where Perl was built are also available via +L<Config>: + + use Config; + print $Config{shortsize}, "\n"; + print $Config{intsize}, "\n"; + print $Config{longsize}, "\n"; + print $Config{longlongsize}, "\n"; + +(The C<$Config{longlongsize}> will be undefine if your system does +not support long longs.) =item * -The integer formats C<"s">, C<"S">, C<"i">, C<"I">, C<"l">, and C<"L"> +The integer formats C<s>, C<S>, C<i>, C<I>, C<l>, and C<L> are inherently non-portable between processors and operating systems because they obey the native byteorder and endianness. For example a -4-byte integer 0x87654321 (2271560481 decimal) be ordered natively +4-byte integer 0x12345678 (305419896 decimal) be ordered natively (arranged in and handled by the CPU registers) into bytes as - + 0x12 0x34 0x56 0x78 # little-endian 0x78 0x56 0x34 0x12 # big-endian - -Basically, the Intel, Alpha, and VAX CPUs and little-endian, while + +Basically, the Intel, Alpha, and VAX CPUs are little-endian, while everybody else, for example Motorola m68k/88k, PPC, Sparc, HP PA, Power, and Cray are big-endian. MIPS can be either: Digital used it -in little-endian mode, SGI uses it in big-endian mode. +in little-endian mode; SGI uses it in big-endian mode. -The names `big-endian' and `little-endian' are joking references to +The names `big-endian' and `little-endian' are comic references to the classic "Gulliver's Travels" (via the paper "On Holy Wars and a Plea for Peace" by Danny Cohen, USC/ISI IEN 137, April 1, 1980) and -the egg-eating habits of the lilliputs. - -Some systems may even have weird byte orders such as - +the egg-eating habits of the Lilliputians. + +Some systems may have even weirder byte orders such as + 0x56 0x78 0x12 0x34 0x34 0x12 0x78 0x56 - + You can see your system's preference with print join(" ", map { sprintf "%#02x", $_ } @@ -2723,8 +3175,9 @@ via L<Config>: Byteorders C<'1234'> and C<'12345678'> are little-endian, C<'4321'> and C<'87654321'> are big-endian. -If you want portable packed integers use the formats C<"n">, C<"N">, -C<"v">, and C<"V">, their byte endianness and size is known. +If you want portable packed integers use the formats C<n>, C<N>, +C<v>, and C<V>, their byte endianness and size is known. +See also L<perlport>. =item * @@ -2734,12 +3187,30 @@ standard "network" representation, no facility for interchange has been made. This means that packed floating point data written on one machine may not be readable on another - even if both use IEEE floating point arithmetic (as the endian-ness of the memory representation is not part -of the IEEE spec). +of the IEEE spec). See also L<perlport>. Note that Perl uses doubles internally for all numeric calculation, and converting from double into float and thence back to double again will lose precision (i.e., C<unpack("f", pack("f", $foo)>) will not in general -equal C<$foo>). +equal $foo). + +=item * + +You must yourself do any alignment or padding by inserting for example +enough C<'x'>es while packing. There is no way to pack() and unpack() +could know where the bytes are going to or coming from. Therefore +C<pack> (and C<unpack>) handle their output and input as flat +sequences of bytes. + +=item * + +A comment in a TEMPLATE starts with C<#> and goes to the end of line. + +=item * + +If TEMPLATE requires more arguments to pack() than actually given, pack() +assumes additional C<""> arguments. If TEMPLATE requires less arguments +to pack() than actually given, extra arguments are ignored. =back @@ -2749,10 +3220,17 @@ Examples: # foo eq "ABCD" $foo = pack("C4",65,66,67,68); # same thing + $foo = pack("U4",0x24b6,0x24b7,0x24b8,0x24b9); + # same thing with Unicode circled letters $foo = pack("ccxxcc",65,66,67,68); # foo eq "AB\0\0CD" + # note: the above examples featuring "C" and "c" are true + # only on ASCII and ASCII-derived systems such as ISO Latin 1 + # and UTF-8. In EBCDIC the first example would be + # $foo = pack("CCCC",193,194,195,196); + $foo = pack("s2",1,2); # "\1\0\2\0" on little-endian # "\0\1\0\2" on big-endian @@ -2780,6 +3258,12 @@ Examples: unpack("N", pack("B32", substr("0" x 32 . shift, -32))); } + $foo = pack('sx2l', 12, 34); + # short 12, two zero bytes padding, long 34 + $bar = pack('s@4l', 12, 34); + # short 12, zero fill to position 4, long 34 + # $foo eq $bar + The same template may generally also be used in unpack(). =item package @@ -2788,11 +3272,11 @@ The same template may generally also be used in unpack(). Declares the compilation unit as being in the given namespace. The scope of the package declaration is from the declaration itself through the end -of the enclosing block, file, or eval (the same as the C<my()> operator). +of the enclosing block, file, or eval (the same as the C<my> operator). All further unqualified dynamic identifiers will be in this namespace. A package statement affects only dynamic variables--including those -you've used C<local()> on--but I<not> lexical variables, which are created -with C<my()>. Typically it would be the first declaration in a file to +you've used C<local> on--but I<not> lexical variables, which are created +with C<my>. Typically it would be the first declaration in a file to be included by the C<require> or C<use> operator. You can switch into a package in more than one place; it merely influences which symbol table is used by the compiler for the rest of that block. You can refer to @@ -2829,13 +3313,14 @@ See L<perlvar/$^F>. =item pop Pops and returns the last value of the array, shortening the array by -one element. Has a similar effect to +one element. Has an effect similar to - $tmp = $ARRAY[$#ARRAY--]; + $ARRAY[$#ARRAY--] -If there are no elements in the array, returns the undefined value. -If ARRAY is omitted, pops the C<@ARGV> array in the main program, and -the C<@_> array in subroutines, just like C<shift()>. +If there are no elements in the array, returns the undefined value +(although this may happen at other times as well). If ARRAY is +omitted, pops the C<@ARGV> array in the main program, and the C<@_> +array in subroutines, just like C<shift>. =item pos SCALAR @@ -2853,22 +3338,26 @@ L<perlop>. =item print -Prints a string or a comma-separated list of strings. Returns TRUE -if successful. FILEHANDLE may be a scalar variable name, in which case -the variable contains the name of or a reference to the filehandle, thus -introducing one level of indirection. (NOTE: If FILEHANDLE is a variable -and the next token is a term, it may be misinterpreted as an operator +Prints a string or a list of strings. Returns true if successful. +FILEHANDLE may be a scalar variable name, in which case the variable +contains the name of or a reference to the filehandle, thus introducing +one level of indirection. (NOTE: If FILEHANDLE is a variable and +the next token is a term, it may be misinterpreted as an operator unless you interpose a C<+> or put parentheses around the arguments.) -If FILEHANDLE is omitted, prints by default to standard output (or to the -last selected output channel--see L</select>). If LIST is also omitted, -prints C<$_> to the currently selected output channel. To set the default -output channel to something other than STDOUT use the select operation. -Note that, because print takes a LIST, anything in the LIST is evaluated -in list context, and any subroutine that you call will have one or -more of its expressions evaluated in list context. Also be careful -not to follow the print keyword with a left parenthesis unless you want -the corresponding right parenthesis to terminate the arguments to the -print--interpose a C<+> or put parentheses around all the arguments. +If FILEHANDLE is omitted, prints by default to standard output (or +to the last selected output channel--see L</select>). If LIST is +also omitted, prints C<$_> to the currently selected output channel. +To set the default output channel to something other than STDOUT +use the select operation. The current value of C<$,> (if any) is +printed between each LIST item. The current value of C<$\> (if +any) is printed after the entire LIST has been printed. Because +print takes a LIST, anything in the LIST is evaluated in list +context, and any subroutine that you call will have one or more of +its expressions evaluated in list context. Also be careful not to +follow the print keyword with a left parenthesis unless you want +the corresponding right parenthesis to terminate the arguments to +the print--interpose a C<+> or put parentheses around all the +arguments. Note that if you're storing FILEHANDLES in an array or other expression, you will have to use a block returning its value instead: @@ -2882,12 +3371,12 @@ you will have to use a block returning its value instead: Equivalent to C<print FILEHANDLE sprintf(FORMAT, LIST)>, except that C<$\> (the output record separator) is not appended. The first argument -of the list will be interpreted as the C<printf()> format. If C<use locale> is +of the list will be interpreted as the C<printf> format. If C<use locale> is in effect, the character used for the decimal point in formatted real numbers is affected by the LC_NUMERIC locale. See L<perllocale>. -Don't fall into the trap of using a C<printf()> when a simple -C<print()> would do. The C<print()> is more efficient and less +Don't fall into the trap of using a C<printf> when a simple +C<print> would do. The C<print> is more efficient and less error prone. =item prototype FUNCTION @@ -2899,7 +3388,7 @@ the function whose prototype you want to retrieve. If FUNCTION is a string starting with C<CORE::>, the rest is taken as a name for Perl builtin. If the builtin is not I<overridable> (such as C<qw//>) or its arguments cannot be expressed by a prototype (such as -C<system()>) returns C<undef> because the builtin does not really behave +C<system>) returns C<undef> because the builtin does not really behave like a Perl function. Otherwise, the string describing the equivalent prototype is returned. @@ -2946,8 +3435,8 @@ If EXPR is omitted, uses C<$_>. Returns a random fractional number greater than or equal to C<0> and less than the value of EXPR. (EXPR should be positive.) If EXPR is -omitted, the value C<1> is used. Automatically calls C<srand()> unless -C<srand()> has already been called. See also C<srand()>. +omitted, the value C<1> is used. Automatically calls C<srand> unless +C<srand> has already been called. See also C<srand>. (Note: If your rand function consistently returns numbers that are too large or too small, then your version of Perl was probably compiled @@ -2963,18 +3452,18 @@ C<0> at end of file, or undef if there was an error. SCALAR will be grown or shrunk to the length actually read. An OFFSET may be specified to place the read data at some other place than the beginning of the string. This call is actually implemented in terms of stdio's fread(3) -call. To get a true read(2) system call, see C<sysread()>. +call. To get a true read(2) system call, see C<sysread>. =item readdir DIRHANDLE -Returns the next directory entry for a directory opened by C<opendir()>. +Returns the next directory entry for a directory opened by C<opendir>. If used in list context, returns all the rest of the entries in the directory. If there are no more entries, returns an undefined value in scalar context or a null list in list context. -If you're planning to filetest the return values out of a C<readdir()>, you'd +If you're planning to filetest the return values out of a C<readdir>, you'd better prepend the directory in question. Otherwise, because we didn't -C<chdir()> there, it would have been testing the wrong file. +C<chdir> there, it would have been testing the wrong file. opendir(DIR, $some_dir) || die "can't opendir $some_dir: $!"; @dots = grep { /^\./ && -f "$some_dir/$_" } readdir(DIR); @@ -2993,8 +3482,8 @@ When C<$/> is set to C<undef>, when readline() is in scalar context (i.e. file slurp mode), and when an empty file is read, it returns C<''> the first time, followed by C<undef> subsequently. -This is the internal function implementing the C<E<lt>EXPRE<gt>> -operator, but you can use it directly. The C<E<lt>EXPRE<gt>> +This is the internal function implementing the C<< <EXPR> >> +operator, but you can use it directly. The C<< <EXPR> >> operator is discussed in more detail in L<perlop/"I/O Operators">. $line = <STDIN>; @@ -3023,12 +3512,13 @@ operator is discussed in more detail in L<perlop/"I/O Operators">. =item recv SOCKET,SCALAR,LENGTH,FLAGS Receives a message on a socket. Attempts to receive LENGTH bytes of -data into variable SCALAR from the specified SOCKET filehandle. -Actually does a C C<recvfrom()>, so that it can return the address of the -sender. Returns the undefined value if there's an error. SCALAR will -be grown or shrunk to the length actually read. Takes the same flags -as the system call of the same name. -See L<perlipc/"UDP: Message Passing"> for examples. +data into variable SCALAR from the specified SOCKET filehandle. SCALAR +will be grown or shrunk to the length actually read. Takes the same +flags as the system call of the same name. Returns the address of the +sender if SOCKET's protocol supports this; returns an empty string +otherwise. If there's an error, returns the undefined value. This call +is actually implemented in terms of recvfrom(2) system call. See +L<perlipc/"UDP: Message Passing"> for examples. =item redo LABEL @@ -3061,6 +3551,10 @@ C<redo> cannot be used to retry a block which returns a value such as C<eval {}>, C<sub {}> or C<do {}>, and should not be used to exit a grep() or map() operation. +Note that a block by itself is semantically identical to a loop +that executes once. Thus C<redo> inside such a block will effectively +turn it into a looping construct. + See also L</continue> for an illustration of how C<last>, C<next>, and C<redo> work. @@ -3068,20 +3562,21 @@ C<redo> work. =item ref -Returns a TRUE value if EXPR is a reference, FALSE otherwise. If EXPR +Returns a true value if EXPR is a reference, false otherwise. If EXPR is not specified, C<$_> will be used. The value returned depends on the type of thing the reference is a reference to. Builtin types include: - REF SCALAR ARRAY HASH CODE + REF GLOB + LVALUE If the referenced object has been blessed into a package, then that package -name is returned instead. You can think of C<ref()> as a C<typeof()> operator. +name is returned instead. You can think of C<ref> as a C<typeof> operator. if (ref($r) eq "HASH") { print "r is a reference to a hash.\n"; @@ -3097,7 +3592,9 @@ See also L<perlref>. =item rename OLDNAME,NEWNAME -Changes the name of a file. Returns C<1> for success, C<0> otherwise. +Changes the name of a file; an existing file NEWNAME will be +clobbered. Returns true for success, false otherwise. + Behavior of this function varies wildly depending on your system implementation. For example, it will usually not work across file system boundaries, even though the system I<mv> command sometimes compensates @@ -3105,17 +3602,29 @@ for this. Other restrictions include whether it works on directories, open files, or pre-existing files. Check L<perlport> and either the rename(2) manpage or equivalent system documentation for details. +=item require VERSION + =item require EXPR =item require Demands some semantics specified by EXPR, or by C<$_> if EXPR is not -supplied. If EXPR is numeric, demands that the current version of Perl -(C<$]> or $PERL_VERSION) be equal or greater than EXPR. +supplied. + +If a VERSION is specified as a literal of the form v5.6.1, +demands that the current version of Perl (C<$^V> or $PERL_VERSION) be +at least as recent as that version, at run time. (For compatibility +with older versions of Perl, a numeric argument will also be interpreted +as VERSION.) Compare with L</use>, which can do a similar check at +compile time. + + require v5.6.1; # run time version check + require 5.6.1; # ditto + require 5.005_03; # float version allowed for compatibility Otherwise, demands that a library file be included if it hasn't already been included. The file is included via the do-FILE mechanism, which is -essentially just a variety of C<eval()>. Has semantics similar to the following +essentially just a variety of C<eval>. Has semantics similar to the following subroutine: sub require { @@ -3126,23 +3635,24 @@ subroutine: foreach $prefix (@INC) { $realfilename = "$prefix/$filename"; if (-f $realfilename) { + $INC{$filename} = $realfilename; $result = do $realfilename; last ITER; } } die "Can't find $filename in \@INC"; } + delete $INC{$filename} if $@ || !$result; die $@ if $@; die "$filename did not return true value" unless $result; - $INC{$filename} = $realfilename; return $result; } Note that the file will not be included twice under the same specified -name. The file must return TRUE as the last statement to indicate +name. The file must return true as the last statement to indicate successful execution of any initialization code, so it's customary to -end such a file with "C<1;>" unless you're sure it'll return TRUE -otherwise. But it's better just to put the "C<1;>", in case you add more +end such a file with C<1;> unless you're sure it'll return true +otherwise. But it's better just to put the C<1;>, in case you add more statements. If EXPR is a bareword, the require assumes a "F<.pm>" extension and @@ -3165,7 +3675,7 @@ But if you try this: require "Foo::Bar"; # not a bareword because of the "" The require function will look for the "F<Foo::Bar>" file in the @INC array and -will complain about not finding "F<Foo::Bar>" there. In this case you can do: +will complain about not finding "F<Foo::Bar>" there. In this case you can do: eval "require $class"; @@ -3198,10 +3708,10 @@ See L</my>. =item return -Returns from a subroutine, C<eval()>, or C<do FILE> with the value +Returns from a subroutine, C<eval>, or C<do FILE> with the value given in EXPR. Evaluation of EXPR may be in list, scalar, or void context, depending on how the return value will be used, and the context -may vary from one execution to the next (see C<wantarray()>). If no EXPR +may vary from one execution to the next (see C<wantarray>). If no EXPR is given, returns an empty list in list context, the undefined value in scalar context, and (of course) nothing at all in a void context. @@ -3232,7 +3742,7 @@ on a large hash, such as from a DBM file. =item rewinddir DIRHANDLE Sets the current position to the beginning of the directory for the -C<readdir()> routine on DIRHANDLE. +C<readdir> routine on DIRHANDLE. =item rindex STR,SUBSTR,POSITION @@ -3247,7 +3757,7 @@ last occurrence at or before that position. =item rmdir Deletes the directory specified by FILENAME if that directory is empty. If it -succeeds it returns TRUE, otherwise it returns FALSE and sets C<$!> (errno). If +succeeds it returns true, otherwise it returns false and sets C<$!> (errno). If FILENAME is omitted, uses C<$_>. =item s/// @@ -3267,7 +3777,7 @@ needed. If you really wanted to do so, however, you could use the construction C<@{[ (some expression) ]}>, but usually a simple C<(some expression)> suffices. -Since C<scalar> is a unary operator, if you accidentally use for EXPR a +Because C<scalar> is unary operator, if you accidentally use for EXPR a parenthesized list, this behaves as a scalar comma expression, evaluating all but the last element in void context and returning the final element evaluated in scalar context. This is seldom what you want. @@ -3285,17 +3795,18 @@ See L<perlop> for more details on unary operators and the comma operator. =item seek FILEHANDLE,POSITION,WHENCE -Sets FILEHANDLE's position, just like the C<fseek()> call of C<stdio()>. +Sets FILEHANDLE's position, just like the C<fseek> call of C<stdio>. FILEHANDLE may be an expression whose value gives the name of the filehandle. The values for WHENCE are C<0> to set the new position to -POSITION, C<1> to set it to the current position plus POSITION, and C<2> to -set it to EOF plus POSITION (typically negative). For WHENCE you may -use the constants C<SEEK_SET>, C<SEEK_CUR>, and C<SEEK_END> from either the -C<IO::Seekable> or the POSIX module. Returns C<1> upon success, C<0> otherwise. +POSITION, C<1> to set it to the current position plus POSITION, and +C<2> to set it to EOF plus POSITION (typically negative). For WHENCE +you may use the constants C<SEEK_SET>, C<SEEK_CUR>, and C<SEEK_END> +(start of the file, current position, end of the file) from the Fcntl +module. Returns C<1> upon success, C<0> otherwise. -If you want to position file for C<sysread()> or C<syswrite()>, don't use -C<seek()> -- buffering makes its effect on the file's system position -unpredictable and non-portable. Use C<sysseek()> instead. +If you want to position file for C<sysread> or C<syswrite>, don't use +C<seek>--buffering makes its effect on the file's system position +unpredictable and non-portable. Use C<sysseek> instead. Due to the rules and rigors of ANSI C, on some systems you have to do a seek whenever you switch between reading and writing. Amongst other @@ -3306,9 +3817,9 @@ A WHENCE of C<1> (C<SEEK_CUR>) is useful for not moving the file position: This is also useful for applications emulating C<tail -f>. Once you hit EOF on your read, and then sleep for a while, you might have to stick in a -seek() to reset things. The C<seek()> doesn't change the current position, +seek() to reset things. The C<seek> doesn't change the current position, but it I<does> clear the end-of-file condition on the handle, so that the -next C<E<lt>FILEE<gt>> makes Perl try again to read something. We hope. +next C<< <FILE> >> makes Perl try again to read something. We hope. If that doesn't work (some stdios are particularly cantankerous), then you may need something more like this: @@ -3324,8 +3835,8 @@ you may need something more like this: =item seekdir DIRHANDLE,POS -Sets the current position for the C<readdir()> routine on DIRHANDLE. POS -must be a value returned by C<telldir()>. Has the same caveats about +Sets the current position for the C<readdir> routine on DIRHANDLE. POS +must be a value returned by C<telldir>. Has the same caveats about possible directory compaction as the corresponding system library routine. @@ -3335,7 +3846,7 @@ routine. Returns the currently selected filehandle. Sets the current default filehandle for output, if FILEHANDLE is supplied. This has two -effects: first, a C<write()> or a C<print()> without a filehandle will +effects: first, a C<write> or a C<print> without a filehandle will default to this FILEHANDLE. Second, references to variables related to output will refer to this output channel. For example, if you have to set the top of form format for more than one output channel, you might @@ -3360,7 +3871,7 @@ methods, preferring to write the last example as: =item select RBITS,WBITS,EBITS,TIMEOUT This calls the select(2) system call with the bit masks specified, which -can be constructed using C<fileno()> and C<vec()>, along these lines: +can be constructed using C<fileno> and C<vec>, along these lines: $rin = $win = $ein = ''; vec($rin,fileno(STDIN),1) = 1; @@ -3389,33 +3900,35 @@ or to block until something becomes ready just do this $nfound = select($rout=$rin, $wout=$win, $eout=$ein, undef); -Most systems do not bother to return anything useful in C<$timeleft>, so -calling select() in scalar context just returns C<$nfound>. +Most systems do not bother to return anything useful in $timeleft, so +calling select() in scalar context just returns $nfound. Any of the bit masks can also be undef. The timeout, if specified, is in seconds, which may be fractional. Note: not all implementations are -capable of returning theC<$timeleft>. If not, they always return -C<$timeleft> equal to the supplied C<$timeout>. +capable of returning the$timeleft. If not, they always return +$timeleft equal to the supplied $timeout. You can effect a sleep of 250 milliseconds this way: select(undef, undef, undef, 0.25); -B<WARNING>: One should not attempt to mix buffered I/O (like C<read()> -or E<lt>FHE<gt>) with C<select()>, except as permitted by POSIX, and even -then only on POSIX systems. You have to use C<sysread()> instead. +B<WARNING>: One should not attempt to mix buffered I/O (like C<read> +or <FH>) with C<select>, except as permitted by POSIX, and even +then only on POSIX systems. You have to use C<sysread> instead. =item semctl ID,SEMNUM,CMD,ARG -Calls the System V IPC function C<semctl()>. You'll probably have to say +Calls the System V IPC function C<semctl>. You'll probably have to say use IPC::SysV; first to get the correct constant definitions. If CMD is IPC_STAT or GETALL, then ARG must be a variable which will hold the returned -semid_ds structure or semaphore value array. Returns like C<ioctl()>: the -undefined value for error, "C<0> but true" for zero, or the actual return -value otherwise. See also C<IPC::SysV> and C<IPC::Semaphore> documentation. +semid_ds structure or semaphore value array. Returns like C<ioctl>: +the undefined value for error, "C<0 but true>" for zero, or the actual +return value otherwise. The ARG must consist of a vector of native +short integers, which may may be created with C<pack("s!",(0)x$nsem)>. +See also C<IPC::SysV> and C<IPC::Semaphore> documentation. =item semget KEY,NSEMS,FLAGS @@ -3429,9 +3942,9 @@ Calls the System V IPC function semop to perform semaphore operations such as signaling and waiting. OPSTRING must be a packed array of semop structures. Each semop structure can be generated with C<pack("sss", $semnum, $semop, $semflag)>. The number of semaphore -operations is implied by the length of OPSTRING. Returns TRUE if -successful, or FALSE if there is an error. As an example, the -following code waits on semaphore C<$semnum> of semaphore id C<$semid>: +operations is implied by the length of OPSTRING. Returns true if +successful, or false if there is an error. As an example, the +following code waits on semaphore $semnum of semaphore id $semid: $semop = pack("sss", $semnum, -1, 0); die "Semaphore trouble: $!\n" unless semop($semid, $semop); @@ -3445,7 +3958,7 @@ and C<IPC::SysV::Semaphore> documentation. Sends a message on a socket. Takes the same flags as the system call of the same name. On unconnected sockets you must specify a -destination to send TO, in which case it does a C C<sendto()>. Returns +destination to send TO, in which case it does a C C<sendto>. Returns the number of characters sent, or the undefined value if there is an error. The C system call sendmsg(2) is currently unimplemented. See L<perlipc/"UDP: Message Passing"> for examples. @@ -3454,9 +3967,10 @@ See L<perlipc/"UDP: Message Passing"> for examples. Sets the current process group for the specified PID, C<0> for the current process. Will produce a fatal error if used on a machine that doesn't -implement setpgrp(2). If the arguments are omitted, it defaults to -C<0,0>. Note that the POSIX version of C<setpgrp()> does not accept any -arguments, so only C<setpgrp(0,0)> is portable. See also C<POSIX::setsid()>. +implement POSIX setpgid(2) or BSD setpgrp(2). If the arguments are omitted, +it defaults to C<0,0>. Note that the BSD 4.2 version of C<setpgrp> does not +accept any arguments, so only C<setpgrp(0,0)> is portable. See also +C<POSIX::setsid()>. =item setpriority WHICH,WHO,PRIORITY @@ -3479,9 +3993,11 @@ array by 1 and moving everything down. If there are no elements in the array, returns the undefined value. If ARRAY is omitted, shifts the C<@_> array within the lexical scope of subroutines and formats, and the C<@ARGV> array at file scopes or within the lexical scopes established by -the C<eval ''>, C<BEGIN {}>, C<END {}>, and C<INIT {}> constructs. -See also C<unshift()>, C<push()>, and C<pop()>. C<Shift()> and C<unshift()> do the -same thing to the left end of an array that C<pop()> and C<push()> do to the +the C<eval ''>, C<BEGIN {}>, C<INIT {}>, C<CHECK {}>, and C<END {}> +constructs. + +See also C<unshift>, C<push>, and C<pop>. C<Shift()> and C<unshift> do the +same thing to the left end of an array that C<pop> and C<push> do to the right end. =item shmctl ID,CMD,ARG @@ -3511,9 +4027,9 @@ position POS for size SIZE by attaching to it, copying in/out, and detaching from it. When reading, VAR must be a variable that will hold the data read. When writing, if STRING is too long, only SIZE bytes are used; if STRING is too short, nulls are written to fill out -SIZE bytes. Return TRUE if successful, or FALSE if there is an error. -See also C<IPC::SysV> documentation and the C<IPC::Shareable> module -from CPAN. +SIZE bytes. Return true if successful, or false if there is an error. +shmread() taints the variable. See also C<IPC::SysV> documentation and +the C<IPC::Shareable> module from CPAN. =item shutdown SOCKET,HOW @@ -3527,7 +4043,7 @@ has the same interpretation as in the system call of the same name. This is useful with sockets when you want to tell the other side you're done writing but not done reading, or vice versa. It's also a more insistent form of close because it also -disables the filedescriptor in any forked copies in other +disables the file descriptor in any forked copies in other processes. =item sin EXPR @@ -3537,7 +4053,7 @@ processes. Returns the sine of EXPR (expressed in radians). If EXPR is omitted, returns sine of C<$_>. -For the inverse sine operation, you may use the C<POSIX::asin()> +For the inverse sine operation, you may use the C<Math::Trig::asin> function, or use this relation: sub asin { atan2($_[0], sqrt(1 - $_[0] * $_[0])) } @@ -3549,8 +4065,8 @@ function, or use this relation: Causes the script to sleep for EXPR seconds, or forever if no EXPR. May be interrupted if the process receives a signal such as C<SIGALRM>. Returns the number of seconds actually slept. You probably cannot -mix C<alarm()> and C<sleep()> calls, because C<sleep()> is often implemented -using C<alarm()>. +mix C<alarm> and C<sleep> calls, because C<sleep> is often implemented +using C<alarm>. On some older systems, it may sleep up to a full second less than what you requested, depending on how it counts seconds. Most modern systems @@ -3559,26 +4075,36 @@ however, because your process might not be scheduled right away in a busy multitasking system. For delays of finer granularity than one second, you may use Perl's -C<syscall()> interface to access setitimer(2) if your system supports it, -or else see L</select> above. +C<syscall> interface to access setitimer(2) if your system supports +it, or else see L</select> above. The Time::HiRes module from CPAN +may also help. -See also the POSIX module's C<sigpause()> function. +See also the POSIX module's C<sigpause> function. =item socket SOCKET,DOMAIN,TYPE,PROTOCOL Opens a socket of the specified kind and attaches it to filehandle -SOCKET. DOMAIN, TYPE, and PROTOCOL are specified the same as for the -system call of the same name. You should "C<use Socket;>" first to get -the proper definitions imported. See the examples in L<perlipc/"Sockets: Client/Server Communication">. +SOCKET. DOMAIN, TYPE, and PROTOCOL are specified the same as for +the system call of the same name. You should C<use Socket> first +to get the proper definitions imported. See the examples in +L<perlipc/"Sockets: Client/Server Communication">. + +On systems that support a close-on-exec flag on files, the flag will +be set for the newly opened file descriptor, as determined by the +value of $^F. See L<perlvar/$^F>. =item socketpair SOCKET1,SOCKET2,DOMAIN,TYPE,PROTOCOL Creates an unnamed pair of sockets in the specified domain, of the specified type. DOMAIN, TYPE, and PROTOCOL are specified the same as for the system call of the same name. If unimplemented, yields a fatal -error. Returns TRUE if successful. +error. Returns true if successful. -Some systems defined C<pipe()> in terms of C<socketpair()>, in which a call +On systems that support a close-on-exec flag on files, the flag will +be set for the newly opened file descriptors, as determined by the value +of $^F. See L<perlvar/$^F>. + +Some systems defined C<pipe> in terms of C<socketpair>, in which a call to C<pipe(Rdr, Wtr)> is essentially: use Socket; @@ -3595,25 +4121,29 @@ See L<perlipc> for an example of socketpair use. =item sort LIST Sorts the LIST and returns the sorted list value. If SUBNAME or BLOCK -is omitted, C<sort()>s in standard string comparison order. If SUBNAME is +is omitted, C<sort>s in standard string comparison order. If SUBNAME is specified, it gives the name of a subroutine that returns an integer less than, equal to, or greater than C<0>, depending on how the elements -of the array are to be ordered. (The C<E<lt>=E<gt>> and C<cmp> +of the list are to be ordered. (The C<< <=> >> and C<cmp> operators are extremely useful in such routines.) SUBNAME may be a scalar variable name (unsubscripted), in which case the value provides the name of (or a reference to) the actual subroutine to use. In place of a SUBNAME, you can provide a BLOCK as an anonymous, in-line sort subroutine. -In the interests of efficiency the normal calling code for subroutines is -bypassed, with the following effects: the subroutine may not be a -recursive subroutine, and the two elements to be compared are passed into -the subroutine not via C<@_> but as the package global variables C<$a> and -C<$b> (see example below). They are passed by reference, so don't -modify C<$a> and C<$b>. And don't try to declare them as lexicals either. +If the subroutine's prototype is C<($$)>, the elements to be compared +are passed by reference in C<@_>, as for a normal subroutine. This is +slower than unprototyped subroutines, where the elements to be +compared are passed into the subroutine +as the package global variables $a and $b (see example below). Note that +in the latter case, it is usually counter-productive to declare $a and +$b as lexicals. + +In either case, the subroutine may not be recursive. The values to be +compared are always passed by reference, so don't modify them. You also cannot exit out of the sort block or subroutine using any of the -loop control operators described in L<perlsyn> or with C<goto()>. +loop control operators described in L<perlsyn> or with C<goto>. When C<use locale> is in effect, C<sort LIST> sorts LIST according to the current collation locale. See L<perllocale>. @@ -3638,19 +4168,19 @@ Examples: # sort numerically descending @articles = sort {$b <=> $a} @files; + # this sorts the %age hash by value instead of key + # using an in-line function + @eldest = sort { $age{$b} <=> $age{$a} } keys %age; + # sort using explicit subroutine name sub byage { $age{$a} <=> $age{$b}; # presuming numeric } @sortedclass = sort byage @class; - # this sorts the %age hash by value instead of key - # using an in-line function - @eldest = sort { $age{$b} <=> $age{$a} } keys %age; - - sub backwards { $b cmp $a; } - @harry = ('dog','cat','x','Cain','Abel'); - @george = ('gone','chased','yz','Punished','Axed'); + sub backwards { $b cmp $a } + @harry = qw(dog cat x Cain Abel); + @george = qw(gone chased yz Punished Axed); print sort @harry; # prints AbelCaincatdogx print sort backwards @harry; @@ -3684,15 +4214,23 @@ Examples: } 0..$#old ]; - # same thing using a Schwartzian Transform (no temps) + # same thing, but without any temps @new = map { $_->[0] } - sort { $b->[1] <=> $a->[1] - || - $a->[2] cmp $b->[2] - } map { [$_, /=(\d+)/, uc($_)] } @old; + sort { $b->[1] <=> $a->[1] + || + $a->[2] cmp $b->[2] + } map { [$_, /=(\d+)/, uc($_)] } @old; + + # using a prototype allows you to use any comparison subroutine + # as a sort subroutine (including other package's subroutines) + package other; + sub backwards ($$) { $_[1] cmp $_[0]; } # $a and $b are not set here -If you're using strict, you I<MUST NOT> declare C<$a> -and C<$b> as lexicals. They are package globals. That means + package main; + @new = sort other::backwards @old; + +If you're using strict, you I<must not> declare $a +and $b as lexicals. They are package globals. That means if you're in the C<main> package, it's @articles = sort {$main::b <=> $main::a} @files; @@ -3716,14 +4254,18 @@ well-defined. =item splice ARRAY,OFFSET +=item splice ARRAY + Removes the elements designated by OFFSET and LENGTH from an array, and replaces them with the elements of LIST, if any. In list context, returns the elements removed from the array. In scalar context, returns the last element removed, or C<undef> if no elements are removed. The array grows or shrinks as necessary. -If OFFSET is negative then it start that far from the end of the array. +If OFFSET is negative then it starts that far from the end of the array. If LENGTH is omitted, removes everything from OFFSET onward. -If LENGTH is negative, leave that many elements off the end of the array. +If LENGTH is negative, leaves that many elements off the end of the array. +If both OFFSET and LENGTH are omitted, removes everything. + The following equivalences hold (assuming C<$[ == 0>): push(@a,$x,$y) splice(@a,@a,0,$x,$y) @@ -3753,7 +4295,7 @@ Example, assuming array lengths are passed before arrays: =item split -Splits a string into an array of strings, and returns it. By default, +Splits a string into a list of strings and returns that list. By default, empty leading fields are preserved, and empty trailing ones are deleted. If not in list context, returns the number of fields found and splits into @@ -3770,7 +4312,7 @@ that the delimiter may be longer than one character.) If LIMIT is specified and positive, splits into no more than that many fields (though it may split into fewer). If LIMIT is unspecified or zero, trailing null fields are stripped (which potential users -of C<pop()> would do well to remember). If LIMIT is negative, it is +of C<pop> would do well to remember). If LIMIT is negative, it is treated as if an arbitrarily large LIMIT had been specified. A pattern matching the null string (not to be confused with @@ -3792,7 +4334,7 @@ unnecessary work. For the list above LIMIT would have been 4 by default. In time critical applications it behooves you not to split into more fields than you really need. -If the PATTERN contains parentheses, additional array elements are +If the PATTERN contains parentheses, additional list elements are created from each matching substring in the delimiter. split(/([,-])/, "1-10,20", 3); @@ -3801,7 +4343,7 @@ produces the list value (1, '-', 10, ',', 20) -If you had the entire header of a normal Unix email message in C<$header>, +If you had the entire header of a normal Unix email message in $header, you could split it up into fields and their values this way: $header =~ s/\n\s+/ /g; # fix continuation lines @@ -3812,11 +4354,11 @@ patterns that vary at runtime. (To do runtime compilation only once, use C</$variable/o>.) As a special case, specifying a PATTERN of space (C<' '>) will split on -white space just as C<split()> with no arguments does. Thus, C<split(' ')> can +white space just as C<split> with no arguments does. Thus, C<split(' ')> can be used to emulate B<awk>'s default behavior, whereas C<split(/ /)> will give you as many null initial fields as there are leading spaces. -A C<split()> on C</\s+/> is like a C<split(' ')> except that any leading -whitespace produces a null first field. A C<split()> with no arguments +A C<split> on C</\s+/> is like a C<split(' ')> except that any leading +whitespace produces a null first field. A C<split> with no arguments really does a C<split(' ', $_)> internally. Example: @@ -3828,22 +4370,22 @@ Example: #... } -(Note that C<$shell> above will still have a newline on it. See L</chop>, +(Note that $shell above will still have a newline on it. See L</chop>, L</chomp>, and L</join>.) =item sprintf FORMAT, LIST -Returns a string formatted by the usual C<printf()> conventions of the -C library function C<sprintf()>. See L<sprintf(3)> or L<printf(3)> +Returns a string formatted by the usual C<printf> conventions of the +C library function C<sprintf>. See L<sprintf(3)> or L<printf(3)> on your system for an explanation of the general principles. -Perl does its own C<sprintf()> formatting -- it emulates the C -function C<sprintf()>, but it doesn't use it (except for floating-point +Perl does its own C<sprintf> formatting--it emulates the C +function C<sprintf>, but it doesn't use it (except for floating-point numbers, and even then only the standard modifiers are allowed). As a -result, any non-standard extensions in your local C<sprintf()> are not +result, any non-standard extensions in your local C<sprintf> are not available from Perl. -Perl's C<sprintf()> permits the following universally-known conversions: +Perl's C<sprintf> permits the following universally-known conversions: %% a percent sign %c a character with the given number @@ -3861,6 +4403,7 @@ In addition, Perl permits the following widely-supported conversions: %X like %x, but using upper-case letters %E like %e, but using an upper-case "E" %G like %g, but with an upper-case "E" (if applicable) + %b an unsigned integer, in binary %p a pointer (outputs the Perl value's address in hexadecimal) %n special: *stores* the number of characters output so far into the next variable in the parameter list @@ -3888,21 +4431,71 @@ and the conversion letter: for integer l interpret integer as C type "long" or "unsigned long" h interpret integer as C type "short" or "unsigned short" + If no flags, interpret integer as C type "int" or "unsigned" -There is also one Perl-specific flag: +There are also two Perl-specific flags: V interpret integer as Perl's standard integer type + v interpret string as a vector of integers, output as + numbers separated either by dots, or by an arbitrary + string received from the argument list when the flag + is preceded by C<*> -Where a number would appear in the flags, an asterisk ("C<*>") may be +Where a number would appear in the flags, an asterisk (C<*>) may be used instead, in which case Perl uses the next item in the parameter list as the given number (that is, as the field width or precision). -If a field width obtained through "C<*>" is negative, it has the same -effect as the "C<->" flag: left-justification. +If a field width obtained through C<*> is negative, it has the same +effect as the C<-> flag: left-justification. + +The C<v> flag is useful for displaying ordinal values of characters +in arbitrary strings: + + printf "version is v%vd\n", $^V; # Perl's version + printf "address is %*vX\n", ":", $addr; # IPv6 address + printf "bits are %*vb\n", " ", $bits; # random bitstring If C<use locale> is in effect, the character used for the decimal point in formatted real numbers is affected by the LC_NUMERIC locale. See L<perllocale>. +If Perl understands "quads" (64-bit integers) (this requires +either that the platform natively support quads or that Perl +be specifically compiled to support quads), the characters + + d u o x X b i D U O + +print quads, and they may optionally be preceded by + + ll L q + +For example + + %lld %16LX %qo + +You can find out whether your Perl supports quads via L<Config>: + + use Config; + ($Config{use64bitint} eq 'define' || $Config{longsize} == 8) && + print "quads\n"; + +If Perl understands "long doubles" (this requires that the platform +support long doubles), the flags + + e f g E F G + +may optionally be preceded by + + ll L + +For example + + %llf %Lg + +You can find out whether your Perl supports long doubles via L<Config>: + + use Config; + $Config{d_longdbl} eq 'define' && print "long doubles\n"; + =item sqrt EXPR =item sqrt @@ -3918,19 +4511,19 @@ loaded the standard Math::Complex module. =item srand -Sets the random number seed for the C<rand()> operator. If EXPR is +Sets the random number seed for the C<rand> operator. If EXPR is omitted, uses a semi-random value supplied by the kernel (if it supports the F</dev/urandom> device) or based on the current time and process ID, among other things. In versions of Perl prior to 5.004 the default -seed was just the current C<time()>. This isn't a particularly good seed, +seed was just the current C<time>. This isn't a particularly good seed, so many old programs supply their own seed value (often C<time ^ $$> or -C<time ^ ($$ + ($$ E<lt>E<lt> 15))>), but that isn't necessary any more. +C<time ^ ($$ + ($$ << 15))>), but that isn't necessary any more. -In fact, it's usually not necessary to call C<srand()> at all, because if +In fact, it's usually not necessary to call C<srand> at all, because if it is not called explicitly, it is called implicitly at the first use of -the C<rand()> operator. However, this was not the case in version of Perl +the C<rand> operator. However, this was not the case in version of Perl before 5.004, so if your script will run under older Perl versions, it -should call C<srand()>. +should call C<srand>. Note that you need something much more random than the default seed for cryptographic purposes. Checksumming the compressed output of one or more @@ -3942,11 +4535,11 @@ example: If you're particularly concerned with this, see the C<Math::TrulyRandom> module in CPAN. -Do I<not> call C<srand()> multiple times in your program unless you know +Do I<not> call C<srand> multiple times in your program unless you know exactly what you're doing and why you're doing it. The point of the -function is to "seed" the C<rand()> function so that C<rand()> can produce +function is to "seed" the C<rand> function so that C<rand> can produce a different sequence each time you run your program. Just do it once at the -top of your program, or you I<won't> get random numbers out of C<rand()>! +top of your program, or you I<won't> get random numbers out of C<rand>! Frequently called programs (like CGI scripts) that simply use @@ -3984,9 +4577,9 @@ meaning of the fields: 5 gid numeric group ID of file's owner 6 rdev the device identifier (special files only) 7 size total size of file, in bytes - 8 atime last access time since the epoch - 9 mtime last modify time since the epoch - 10 ctime inode change time (NOT creation time!) since the epoch + 8 atime last access time in seconds since the epoch + 9 mtime last modify time in seconds since the epoch + 10 ctime inode change time (NOT creation time!) in seconds since the epoch 11 blksize preferred block size for file system I/O 12 blocks actual number of blocks allocated @@ -4000,7 +4593,8 @@ last stat or filetest are returned. Example: print "$file is executable NFS file\n"; } -(This works on machines only for which the device number is negative under NFS.) +(This works on machines only for which the device number is negative +under NFS.) Because the mode contains both the file type and its permissions, you should mask off the file type portion and (s)printf using a C<"%o"> @@ -4009,8 +4603,7 @@ if you want to see the real permissions. $mode = (stat($filename))[2]; printf "Permissions are %04o\n", $mode & 07777; - -In scalar context, C<stat()> returns a boolean value indicating success +In scalar context, C<stat> returns a boolean value indicating success or failure, and, if successful, sets the information associated with the special filehandle C<_>. @@ -4022,6 +4615,66 @@ The File::stat module provides a convenient, by-name access mechanism: $filename, $sb->size, $sb->mode & 07777, scalar localtime $sb->mtime; +You can import symbolic mode constants (C<S_IF*>) and functions +(C<S_IS*>) from the Fcntl module: + + use Fcntl ':mode'; + + $mode = (stat($filename))[2]; + + $user_rwx = ($mode & S_IRWXU) >> 6; + $group_read = ($mode & S_IRGRP) >> 3; + $other_execute = $mode & S_IXOTH; + + printf "Permissions are %04o\n", S_ISMODE($mode), "\n"; + + $is_setuid = $mode & S_ISUID; + $is_setgid = S_ISDIR($mode); + +You could write the last two using the C<-u> and C<-d> operators. +The commonly available S_IF* constants are + + # Permissions: read, write, execute, for user, group, others. + + S_IRWXU S_IRUSR S_IWUSR S_IXUSR + S_IRWXG S_IRGRP S_IWGRP S_IXGRP + S_IRWXO S_IROTH S_IWOTH S_IXOTH + + # Setuid/Setgid/Stickiness. + + S_ISUID S_ISGID S_ISVTX S_ISTXT + + # File types. Not necessarily all are available on your system. + + S_IFREG S_IFDIR S_IFLNK S_IFBLK S_ISCHR S_IFIFO S_IFSOCK S_IFWHT S_ENFMT + + # The following are compatibility aliases for S_IRUSR, S_IWUSR, S_IXUSR. + + S_IREAD S_IWRITE S_IEXEC + +and the S_IF* functions are + + S_IFMODE($mode) the part of $mode containg the permission bits + and the setuid/setgid/sticky bits + + S_IFMT($mode) the part of $mode containing the file type + which can be bit-anded with e.g. S_IFREG + or with the following functions + + # The operators -f, -d, -l, -b, -c, -p, and -s. + + S_ISREG($mode) S_ISDIR($mode) S_ISLNK($mode) + S_ISBLK($mode) S_ISCHR($mode) S_ISFIFO($mode) S_ISSOCK($mode) + + # No direct -X operator counterpart, but for the first one + # the -g operator is often equivalent. The ENFMT stands for + # record flocking enforcement, a platform-dependent feature. + + S_ISENFMT($mode) S_ISWHT($mode) + +See your native chmod(2) and stat(2) documentation for more details +about the S_* constants. + =item study SCALAR =item study @@ -4030,12 +4683,12 @@ Takes extra time to study SCALAR (C<$_> if unspecified) in anticipation of doing many pattern matches on the string before it is next modified. This may or may not save time, depending on the nature and number of patterns you are searching on, and on the distribution of character -frequencies in the string to be searched -- you probably want to compare +frequencies in the string to be searched--you probably want to compare run times with and without it to see which runs faster. Those loops which scan for many short constant strings (including the constant parts of more complex patterns) will benefit most. You may have only -one C<study()> active at a time -- if you study a different scalar the first -is "unstudied". (The way C<study()> works is this: a linked list of every +one C<study> active at a time--if you study a different scalar the first +is "unstudied". (The way C<study> works is this: a linked list of every character in the string to be searched is made, so we know, for example, where all the C<'k'> characters are. From each search string, the rarest character is selected, based on some static frequency tables @@ -4054,14 +4707,14 @@ before any line containing a certain pattern: print; } -In searching for C</\bfoo\b/>, only those locations in C<$_> that contain C<"f"> -will be looked at, because C<"f"> is rarer than C<"o">. In general, this is +In searching for C</\bfoo\b/>, only those locations in C<$_> that contain C<f> +will be looked at, because C<f> is rarer than C<o>. In general, this is a big win except in pathological cases. The only question is whether it saves you more time than it took to build the linked list in the first place. Note that if you have to look for strings that you don't know till -runtime, you can build an entire loop as a string and C<eval()> that to +runtime, you can build an entire loop as a string and C<eval> that to avoid recompiling all your patterns all the time. Together with undefining C<$/> to input entire files as one record, this can be very fast, often faster than specialized programs like fgrep(1). The following @@ -4088,33 +4741,42 @@ out the names of those files that contain a match: =item sub NAME BLOCK This is subroutine definition, not a real function I<per se>. With just a -NAME (and possibly prototypes), it's just a forward declaration. Without -a NAME, it's an anonymous function declaration, and does actually return a -value: the CODE ref of the closure you just created. See L<perlsub> and -L<perlref> for details. +NAME (and possibly prototypes or attributes), it's just a forward declaration. +Without a NAME, it's an anonymous function declaration, and does actually +return a value: the CODE ref of the closure you just created. See L<perlsub> +and L<perlref> for details. -=item substr EXPR,OFFSET,LEN,REPLACEMENT +=item substr EXPR,OFFSET,LENGTH,REPLACEMENT -=item substr EXPR,OFFSET,LEN +=item substr EXPR,OFFSET,LENGTH =item substr EXPR,OFFSET Extracts a substring out of EXPR and returns it. First character is at offset C<0>, or whatever you've set C<$[> to (but don't do that). If OFFSET is negative (or more precisely, less than C<$[>), starts -that far from the end of the string. If LEN is omitted, returns -everything to the end of the string. If LEN is negative, leaves that +that far from the end of the string. If LENGTH is omitted, returns +everything to the end of the string. If LENGTH is negative, leaves that many characters off the end of the string. -If you specify a substring that is partly outside the string, the part -within the string is returned. If the substring is totally outside -the string a warning is produced. - You can use the substr() function as an lvalue, in which case EXPR -must itself be an lvalue. If you assign something shorter than LEN, -the string will shrink, and if you assign something longer than LEN, +must itself be an lvalue. If you assign something shorter than LENGTH, +the string will shrink, and if you assign something longer than LENGTH, the string will grow to accommodate it. To keep the string the same -length you may need to pad or chop your value using C<sprintf()>. +length you may need to pad or chop your value using C<sprintf>. + +If OFFSET and LENGTH specify a substring that is partly outside the +string, only the part within the string is returned. If the substring +is beyond either end of the string, substr() returns the undefined +value and produces a warning. When used as an lvalue, specifying a +substring that is entirely outside the string is a fatal error. +Here's an example showing the behavior for boundary cases: + + my $name = 'fred'; + substr($name, 4) = 'dy'; # $name is now 'freddy' + my $null = substr $name, 6, 2; # returns '' (no warning) + my $oops = substr $name, 7; # returns undef, with warning + substr($name, 7) = 'gap'; # fatal error An alternative to using substr() as an lvalue is to specify the replacement string as the 4th argument. This allows you to replace @@ -4139,12 +4801,12 @@ as follows: if a given argument is numeric, the argument is passed as an int. If not, the pointer to the string value is passed. You are responsible to make sure a string is pre-extended long enough to receive any result that might be written into a string. You can't use a -string literal (or other read-only string) as an argument to C<syscall()> +string literal (or other read-only string) as an argument to C<syscall> because Perl has to assume that any string pointer might be written through. If your integer arguments are not literals and have never been interpreted in a numeric context, you may need to add C<0> to them to force them to look -like numbers. This emulates the C<syswrite()> function (or vice versa): +like numbers. This emulates the C<syswrite> function (or vice versa): require 'syscall.ph'; # may need to run h2ph $s = "hi there\n"; @@ -4154,7 +4816,7 @@ Note that Perl supports passing of up to only 14 arguments to your system call, which in practice should usually suffice. Syscall returns whatever value returned by the system call it calls. -If the system call fails, C<syscall()> returns C<-1> and sets C<$!> (errno). +If the system call fails, C<syscall> returns C<-1> and sets C<$!> (errno). Note that some system calls can legitimately return C<-1>. The proper way to handle such calls is to assign C<$!=0;> before the call and check the value of C<$!> if syscall returns C<-1>. @@ -4162,7 +4824,7 @@ check the value of C<$!> if syscall returns C<-1>. There's a problem with C<syscall(&SYS_pipe)>: it returns the file number of the read end of the pipe it creates. There is no way to retrieve the file number of the other end. You can avoid this -problem by using C<pipe()> instead. +problem by using C<pipe> instead. =item sysopen FILEHANDLE,FILENAME,MODE @@ -4171,29 +4833,50 @@ problem by using C<pipe()> instead. Opens the file whose filename is given by FILENAME, and associates it with FILEHANDLE. If FILEHANDLE is an expression, its value is used as the name of the real filehandle wanted. This function calls the -underlying operating system's C<open()> function with the parameters +underlying operating system's C<open> function with the parameters FILENAME, MODE, PERMS. The possible values and flag bits of the MODE parameter are system-dependent; they are available via the standard module C<Fcntl>. +See the documentation of your operating system's C<open> to see which +values and flag bits are available. You may combine several flags +using the C<|>-operator. + +Some of the most common values are C<O_RDONLY> for opening the file in +read-only mode, C<O_WRONLY> for opening the file in write-only mode, +and C<O_RDWR> for opening the file in read-write mode, and. + For historical reasons, some values work on almost every system supported by perl: zero means read-only, one means write-only, and two means read/write. We know that these values do I<not> work under OS/390 & VM/ESA Unix and on the Macintosh; you probably don't want to use them in new code. -If the file named by FILENAME does not exist and the C<open()> call creates +If the file named by FILENAME does not exist and the C<open> call creates it (typically because MODE includes the C<O_CREAT> flag), then the value of PERMS specifies the permissions of the newly created file. If you omit -the PERMS argument to C<sysopen()>, Perl uses the octal value C<0666>. +the PERMS argument to C<sysopen>, Perl uses the octal value C<0666>. These permission values need to be in octal, and are modified by your process's current C<umask>. -You should seldom if ever use C<0644> as argument to C<sysopen()>, because +In many systems the C<O_EXCL> flag is available for opening files in +exclusive mode. This is B<not> locking: exclusiveness means here that +if the file already exists, sysopen() fails. The C<O_EXCL> wins +C<O_TRUNC>. + +Sometimes you may want to truncate an already-existing file: C<O_TRUNC>. + +You should seldom if ever use C<0644> as argument to C<sysopen>, because that takes away the user's option to have a more permissive umask. Better to omit it. See the perlfunc(1) entry on C<umask> for more on this. +Note that C<sysopen> depends on the fdopen() C library function. +On many UNIX systems, fdopen() is known to fail when file descriptors +exceed a certain value, typically 255. If you need more file +descriptors than that, consider rebuilding Perl to use the C<sfio> +library, or perhaps using the POSIX::open() function. + See L<perlopentut> for a kinder, gentler explanation of opening files. =item sysread FILEHANDLE,SCALAR,LENGTH,OFFSET @@ -4202,8 +4885,8 @@ See L<perlopentut> for a kinder, gentler explanation of opening files. Attempts to read LENGTH bytes of data into variable SCALAR from the specified FILEHANDLE, using the system call read(2). It bypasses stdio, -so mixing this with other kinds of reads, C<print()>, C<write()>, -C<seek()>, C<tell()>, or C<eof()> can cause confusion because stdio +so mixing this with other kinds of reads, C<print>, C<write>, +C<seek>, C<tell>, or C<eof> can cause confusion because stdio usually buffers data. Returns the number of bytes actually read, C<0> at end of file, or undef if there was an error. SCALAR will be grown or shrunk so that the last byte actually read is the last byte of the @@ -4223,47 +4906,56 @@ for a return value for 0 to decide whether you're done. =item sysseek FILEHANDLE,POSITION,WHENCE Sets FILEHANDLE's system position using the system call lseek(2). It -bypasses stdio, so mixing this with reads (other than C<sysread()>), -C<print()>, C<write()>, C<seek()>, C<tell()>, or C<eof()> may cause -confusion. FILEHANDLE may be an expression whose value gives the name -of the filehandle. The values for WHENCE are C<0> to set the new -position to POSITION, C<1> to set the it to the current position plus -POSITION, and C<2> to set it to EOF plus POSITION (typically negative). -For WHENCE, you may use the constants C<SEEK_SET>, C<SEEK_CUR>, and -C<SEEK_END> from either the C<IO::Seekable> or the POSIX module. +bypasses stdio, so mixing this with reads (other than C<sysread>), +C<print>, C<write>, C<seek>, C<tell>, or C<eof> may cause confusion. +FILEHANDLE may be an expression whose value gives the name of the +filehandle. The values for WHENCE are C<0> to set the new position to +POSITION, C<1> to set the it to the current position plus POSITION, +and C<2> to set it to EOF plus POSITION (typically negative). For +WHENCE, you may also use the constants C<SEEK_SET>, C<SEEK_CUR>, and +C<SEEK_END> (start of the file, current position, end of the file) +from the Fcntl module. Returns the new position, or the undefined value on failure. A position -of zero is returned as the string "C<0> but true"; thus C<sysseek()> returns -TRUE on success and FALSE on failure, yet you can still easily determine +of zero is returned as the string C<"0 but true">; thus C<sysseek> returns +true on success and false on failure, yet you can still easily determine the new position. =item system LIST =item system PROGRAM LIST -Does exactly the same thing as "C<exec LIST>", except that a fork is done -first, and the parent process waits for the child process to complete. -Note that argument processing varies depending on the number of -arguments. If there is more than one argument in LIST, or if LIST is -an array with more than one value, starts the program given by the -first element of the list with arguments given by the rest of the list. -If there is only one scalar argument, the argument is -checked for shell metacharacters, and if there are any, the entire -argument is passed to the system's command shell for parsing (this is -C</bin/sh -c> on Unix platforms, but varies on other platforms). If -there are no shell metacharacters in the argument, it is split into -words and passed directly to C<execvp()>, which is more efficient. +Does exactly the same thing as C<exec LIST>, except that a fork is +done first, and the parent process waits for the child process to +complete. Note that argument processing varies depending on the +number of arguments. If there is more than one argument in LIST, +or if LIST is an array with more than one value, starts the program +given by the first element of the list with arguments given by the +rest of the list. If there is only one scalar argument, the argument +is checked for shell metacharacters, and if there are any, the +entire argument is passed to the system's command shell for parsing +(this is C</bin/sh -c> on Unix platforms, but varies on other +platforms). If there are no shell metacharacters in the argument, +it is split into words and passed directly to C<execvp>, which is +more efficient. + +Beginning with v5.6.0, Perl will attempt to flush all files opened for +output before any operation that may do a fork, but this may not be +supported on some platforms (see L<perlport>). To be safe, you may need +to set C<$|> ($AUTOFLUSH in English) or call the C<autoflush()> method +of C<IO::Handle> on any open handles. The return value is the exit status of the program as -returned by the C<wait()> call. To get the actual exit value divide by -256. See also L</exec>. This is I<NOT> what you want to use to capture +returned by the C<wait> call. To get the actual exit value divide by +256. See also L</exec>. This is I<not> what you want to use to capture the output from a command, for that you should use merely backticks or -C<qx//>, as described in L<perlop/"`STRING`">. +C<qx//>, as described in L<perlop/"`STRING`">. Return value of -1 +indicates a failure to start the program (inspect $! for the reason). -Like C<exec()>, C<system()> allows you to lie to a program about its name if -you use the "C<system PROGRAM LIST>" syntax. Again, see L</exec>. +Like C<exec>, C<system> allows you to lie to a program about its name if +you use the C<system PROGRAM LIST> syntax. Again, see L</exec>. -Because C<system()> and backticks block C<SIGINT> and C<SIGQUIT>, killing the +Because C<system> and backticks block C<SIGINT> and C<SIGQUIT>, killing the program they're running doesn't actually interrupt your program. @args = ("command", "arg1", "arg2"); @@ -4288,14 +4980,14 @@ See L<perlop/"`STRING`"> and L</exec> for details. =item syswrite FILEHANDLE,SCALAR Attempts to write LENGTH bytes of data from variable SCALAR to the -specified FILEHANDLE, using the system call write(2). If LENGTH is -not specified, writes whole SCALAR. It bypasses -stdio, so mixing this with reads (other than C<sysread())>, C<print()>, -C<write()>, C<seek()>, C<tell()>, or C<eof()> may cause confusion -because stdio usually buffers data. Returns the number of bytes -actually written, or C<undef> if there was an error. If the LENGTH is -greater than the available data in the SCALAR after the OFFSET, only as -much data as is available will be written. +specified FILEHANDLE, using the system call write(2). If LENGTH +is not specified, writes whole SCALAR. It bypasses stdio, so mixing +this with reads (other than C<sysread())>, C<print>, C<write>, +C<seek>, C<tell>, or C<eof> may cause confusion because stdio +usually buffers data. Returns the number of bytes actually written, +or C<undef> if there was an error. If the LENGTH is greater than +the available data in the SCALAR after the OFFSET, only as much +data as is available will be written. An OFFSET may be specified to write the data from some part of the string other than the beginning. A negative OFFSET specifies writing @@ -4310,12 +5002,12 @@ Returns the current position for FILEHANDLE. FILEHANDLE may be an expression whose value gives the name of the actual filehandle. If FILEHANDLE is omitted, assumes the file last read. -There is no C<systell()> function. Use C<sysseek(FH, 0, 1)> for that. +There is no C<systell> function. Use C<sysseek(FH, 0, 1)> for that. =item telldir DIRHANDLE -Returns the current position of the C<readdir()> routines on DIRHANDLE. -Value may be given to C<seekdir()> to access a particular location in a +Returns the current position of the C<readdir> routines on DIRHANDLE. +Value may be given to C<seekdir> to access a particular location in a directory. Has the same caveats about possible directory compaction as the corresponding system library routine. @@ -4324,16 +5016,16 @@ the corresponding system library routine. This function binds a variable to a package class that will provide the implementation for the variable. VARIABLE is the name of the variable to be enchanted. CLASSNAME is the name of a class implementing objects -of correct type. Any additional arguments are passed to the "C<new()>" +of correct type. Any additional arguments are passed to the C<new> method of the class (meaning C<TIESCALAR>, C<TIEHANDLE>, C<TIEARRAY>, or C<TIEHASH>). Typically these are arguments such as might be passed -to the C<dbm_open()> function of C. The object returned by the "C<new()>" -method is also returned by the C<tie()> function, which would be useful +to the C<dbm_open()> function of C. The object returned by the C<new> +method is also returned by the C<tie> function, which would be useful if you want to access other methods in CLASSNAME. -Note that functions such as C<keys()> and C<values()> may return huge lists +Note that functions such as C<keys> and C<values> may return huge lists when used on large objects, like DBM files. You may prefer to use the -C<each()> function to iterate over such. Example: +C<each> function to iterate over such. Example: # print out history file offsets use NDBM_File; @@ -4393,16 +5085,16 @@ A class implementing a scalar should have the following methods: Not all methods indicated above need be implemented. See L<perltie>, L<Tie::Hash>, L<Tie::Array>, L<Tie::Scalar>, and L<Tie::Handle>. -Unlike C<dbmopen()>, the C<tie()> function will not use or require a module +Unlike C<dbmopen>, the C<tie> function will not use or require a module for you--you need to do that explicitly yourself. See L<DB_File> -or the F<Config> module for interesting C<tie()> implementations. +or the F<Config> module for interesting C<tie> implementations. For further details see L<perltie>, L<"tied VARIABLE">. =item tied VARIABLE Returns a reference to the object underlying VARIABLE (the same value -that was originally returned by the C<tie()> call that bound the variable +that was originally returned by the C<tie> call that bound the variable to a package.) Returns the undefined value if VARIABLE isn't tied to a package. @@ -4411,7 +5103,12 @@ package. Returns the number of non-leap seconds since whatever time the system considers to be the epoch (that's 00:00:00, January 1, 1904 for MacOS, and 00:00:00 UTC, January 1, 1970 for most other systems). -Suitable for feeding to C<gmtime()> and C<localtime()>. +Suitable for feeding to C<gmtime> and C<localtime>. + +For measuring time in better granularity than one second, +you may use either the Time::HiRes module from CPAN, or +if you have gettimeofday(2), you may be able to use the +C<syscall> interface of Perl, see L<perlfaq8> for details. =item times @@ -4422,7 +5119,7 @@ seconds, for this process and the children of this process. =item tr/// -The transliteration operator. Same as C<y///>. See L<perlop>. +The transliteration operator. Same as C<y///>. See L<perlop>. =item truncate FILEHANDLE,LENGTH @@ -4430,7 +5127,7 @@ The transliteration operator. Same as C<y///>. See L<perlop>. Truncates the file opened on FILEHANDLE, or named by EXPR, to the specified length. Produces a fatal error if truncate isn't implemented -on your system. Returns TRUE if successful, the undefined value +on your system. Returns true if successful, the undefined value otherwise. =item uc EXPR @@ -4440,7 +5137,8 @@ otherwise. Returns an uppercased version of EXPR. This is the internal function implementing the C<\U> escape in double-quoted strings. Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale>. -(It does not attempt to do titlecase mapping on initial letters. See C<ucfirst()> for that.) +Under Unicode (C<use utf8>) it uses the standard Unicode uppercase mappings. (It +does not attempt to do titlecase mapping on initial letters. See C<ucfirst> for that.) If EXPR is omitted, uses C<$_>. @@ -4448,9 +5146,11 @@ If EXPR is omitted, uses C<$_>. =item ucfirst -Returns the value of EXPR with the first character in uppercase. This is +Returns the value of EXPR with the first character +in uppercase (titlecase in Unicode). This is the internal function implementing the C<\u> escape in double-quoted strings. -Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale>. +Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale> +and L<utf8>. If EXPR is omitted, uses C<$_>. @@ -4470,12 +5170,12 @@ even if you tell C<sysopen> to create a file with permissions C<0777>, if your umask is C<0022> then the file will actually be created with permissions C<0755>. If your C<umask> were C<0027> (group can't write; others can't read, write, or execute), then passing -C<sysopen()> C<0666> would create a file with mode C<0640> (C<0666 &~ +C<sysopen> C<0666> would create a file with mode C<0640> (C<0666 &~ 027> is C<0640>). Here's some advice: supply a creation mode of C<0666> for regular -files (in C<sysopen()>) and one of C<0777> for directories (in -C<mkdir()>) and executable files. This gives users the freedom of +files (in C<sysopen>) and one of C<0777> for directories (in +C<mkdir>) and executable files. This gives users the freedom of choice: if they want protected files, they might choose process umasks of C<022>, C<027>, or even the particularly antisocial mask of C<077>. Programs should rarely if ever make policy decisions better left to @@ -4496,8 +5196,8 @@ string of octal digits. See also L</oct>, if all you have is a string. =item undef Undefines the value of EXPR, which must be an lvalue. Use only on a -scalar value, an array (using "C<@>"), a hash (using "C<%>"), a subroutine -(using "C<&>"), or a typeglob (using "<*>"). (Saying C<undef $hash{$key}> +scalar value, an array (using C<@>), a hash (using C<%>), a subroutine +(using C<&>), or a typeglob (using <*>). (Saying C<undef $hash{$key}> will probably not do what you expect on most predefined variables or DBM list values, so don't do that; see L<delete>.) Always returns the undefined value. You can omit the EXPR, in which case nothing is @@ -4528,19 +5228,25 @@ deleted. unlink @goners; unlink <*.bak>; -Note: C<unlink()> will not delete directories unless you are superuser and +Note: C<unlink> will not delete directories unless you are superuser and the B<-U> flag is supplied to Perl. Even if these conditions are met, be warned that unlinking a directory can inflict damage on your -filesystem. Use C<rmdir()> instead. +filesystem. Use C<rmdir> instead. If LIST is omitted, uses C<$_>. =item unpack TEMPLATE,EXPR -C<Unpack()> does the reverse of C<pack()>: it takes a string representing a -structure and expands it out into a list value, returning the array -value. (In scalar context, it returns merely the first value -produced.) The TEMPLATE has the same format as in the C<pack()> function. +C<unpack> does the reverse of C<pack>: it takes a string +and expands it out into a list of values. +(In scalar context, it returns merely the first value produced.) + +The string is broken into chunks described by the TEMPLATE. Each chunk +is converted separately to a value. Typically, either the string is a result +of C<pack>, or the bytes of the string represent a C structure of some +kind. + +The TEMPLATE has the same format as in the C<pack> function. Here's a subroutine that does substring: sub substr { @@ -4552,44 +5258,60 @@ and then there's sub ordinal { unpack("c",$_[0]); } # same as ord() -In addition, you may prefix a field with a %E<lt>numberE<gt> to indicate that -you want a E<lt>numberE<gt>-bit checksum of the items instead of the items -themselves. Default is a 16-bit checksum. For example, the following +In addition to fields allowed in pack(), you may prefix a field with +a %<number> to indicate that +you want a <number>-bit checksum of the items instead of the items +themselves. Default is a 16-bit checksum. Checksum is calculated by +summing numeric values of expanded values (for string fields the sum of +C<ord($char)> is taken, for bit fields the sum of zeroes and ones). + +For example, the following computes the same number as the System V sum program: - while (<>) { - $checksum += unpack("%32C*", $_); - } - $checksum %= 65535; + $checksum = do { + local $/; # slurp! + unpack("%32C*",<>) % 65535; + }; The following efficiently counts the number of set bits in a bit vector: $setbits = unpack("%32b*", $selectmask); -See L</pack> for more examples. +The C<p> and C<P> formats should be used with care. Since Perl +has no way of checking whether the value passed to C<unpack()> +corresponds to a valid memory location, passing a pointer value that's +not known to be valid is likely to have disastrous consequences. + +If the repeat count of a field is larger than what the remainder of +the input string allows, repeat count is decreased. If the input string +is longer than one described by the TEMPLATE, the rest is ignored. + +See L</pack> for more examples and notes. =item untie VARIABLE -Breaks the binding between a variable and a package. (See C<tie()>.) +Breaks the binding between a variable and a package. (See C<tie>.) =item unshift ARRAY,LIST -Does the opposite of a C<shift()>. Or the opposite of a C<push()>, +Does the opposite of a C<shift>. Or the opposite of a C<push>, depending on how you look at it. Prepends list to the front of the array, and returns the new number of elements in the array. unshift(ARGV, '-e') unless $ARGV[0] =~ /^-/; Note the LIST is prepended whole, not one element at a time, so the -prepended elements stay in the same order. Use C<reverse()> to do the +prepended elements stay in the same order. Use C<reverse> to do the reverse. +=item use Module VERSION LIST + +=item use Module VERSION + =item use Module LIST =item use Module -=item use Module VERSION LIST - =item use VERSION Imports some semantics into the current package from the named module, @@ -4600,24 +5322,32 @@ package. It is exactly equivalent to except that Module I<must> be a bareword. -If the first argument to C<use> is a number, it is treated as a version -number instead of a module name. If the version of the Perl interpreter -is less than VERSION, then an error message is printed and Perl exits -immediately. This is often useful if you need to check the current -Perl version before C<use>ing library modules that have changed in -incompatible ways from older versions of Perl. (We try not to do -this more than we have to.) +VERSION, which can be specified as a literal of the form v5.6.1, demands +that the current version of Perl (C<$^V> or $PERL_VERSION) be at least +as recent as that version. (For compatibility with older versions of Perl, +a numeric literal will also be interpreted as VERSION.) If the version +of the running Perl interpreter is less than VERSION, then an error +message is printed and Perl exits immediately without attempting to +parse the rest of the file. Compare with L</require>, which can do a +similar check at run time. -The C<BEGIN> forces the C<require> and C<import()> to happen at compile time. The + use v5.6.1; # compile time version check + use 5.6.1; # ditto + use 5.005_03; # float version allowed for compatibility + +This is often useful if you need to check the current Perl version before +C<use>ing library modules that have changed in incompatible ways from +older versions of Perl. (We try not to do this more than we have to.) + +The C<BEGIN> forces the C<require> and C<import> to happen at compile time. The C<require> makes sure the module is loaded into memory if it hasn't been -yet. The C<import()> is not a builtin--it's just an ordinary static method -call into the "C<Module>" package to tell the module to import the list of +yet. The C<import> is not a builtin--it's just an ordinary static method +call into the C<Module> package to tell the module to import the list of features back into the current package. The module can implement its -C<import()> method any way it likes, though most modules just choose to -derive their C<import()> method via inheritance from the C<Exporter> class that -is defined in the C<Exporter> module. See L<Exporter>. If no C<import()> -method can be found then the error is currently silently ignored. This -may change to a fatal error in a future version. +C<import> method any way it likes, though most modules just choose to +derive their C<import> method via inheritance from the C<Exporter> class that +is defined in the C<Exporter> module. See L<Exporter>. If no C<import> +method can be found then the call is skipped. If you don't want your namespace altered, explicitly supply an empty list: @@ -4630,31 +5360,36 @@ That is exactly equivalent to If the VERSION argument is present between Module and LIST, then the C<use> will call the VERSION method in class Module with the given version as an argument. The default VERSION method, inherited from -the Universal class, croaks if the given version is larger than the -value of the variable C<$Module::VERSION>. (Note that there is not a -comma after VERSION!) +the UNIVERSAL class, croaks if the given version is larger than the +value of the variable C<$Module::VERSION>. + +Again, there is a distinction between omitting LIST (C<import> called +with no arguments) and an explicit empty LIST C<()> (C<import> not +called). Note that there is no comma after VERSION! Because this is a wide-open interface, pragmas (compiler directives) are also implemented this way. Currently implemented pragmas are: use integer; use diagnostics; - use sigtrap qw(SEGV BUS); - use strict qw(subs vars refs); - use subs qw(afunc blurfl); + use sigtrap qw(SEGV BUS); + use strict qw(subs vars refs); + use subs qw(afunc blurfl); + use warnings qw(all); -Some of these these pseudo-modules import semantics into the current +Some of these pseudo-modules import semantics into the current block scope (like C<strict> or C<integer>, unlike ordinary modules, which import symbols into the current package (which are effective through the end of the file). -There's a corresponding "C<no>" command that unimports meanings imported -by C<use>, i.e., it calls C<unimport Module LIST> instead of C<import()>. +There's a corresponding C<no> command that unimports meanings imported +by C<use>, i.e., it calls C<unimport Module LIST> instead of C<import>. no integer; no strict 'refs'; + no warnings; -If no C<unimport()> method can be found the call fails with a fatal error. +If no C<unimport> method can be found the call fails with a fatal error. See L<perlmod> for a list of standard modules and pragmas. @@ -4663,8 +5398,8 @@ See L<perlmod> for a list of standard modules and pragmas. Changes the access and modification times on each file of a list of files. The first two elements of the list must be the NUMERICAL access and modification times, in that order. Returns the number of files -successfully changed. The inode modification time of each file is set -to the current time. This code has the same effect as the "C<touch>" +successfully changed. The inode change time of each file is set +to the current time. This code has the same effect as the C<touch> command if the files already exist: #!/usr/bin/perl @@ -4677,7 +5412,7 @@ Returns a list consisting of all the values of the named hash. (In a scalar context, returns the number of values.) The values are returned in an apparently random order. The actual random order is subject to change in future versions of perl, but it is guaranteed to -be the same order as either the C<keys()> or C<each()> function would +be the same order as either the C<keys> or C<each> function would produce on the same (unmodified) hash. Note that you cannot modify the values of a hash this way, because the @@ -4688,29 +5423,55 @@ since it's lvaluable in a way that values() is not. for (@hash{keys %hash}) { s/foo/bar/g } # ok As a side effect, calling values() resets the HASH's internal iterator. -See also C<keys()>, C<each()>, and C<sort()>. +See also C<keys>, C<each>, and C<sort>. =item vec EXPR,OFFSET,BITS -Treats the string in EXPR as a vector of unsigned integers, and -returns the value of the bit field specified by OFFSET. BITS specifies -the number of bits that are reserved for each entry in the bit -vector. This must be a power of two from 1 to 32. C<vec()> may also be -assigned to, in which case parentheses are needed to give the expression -the correct precedence as in +Treats the string in EXPR as a bit vector made up of elements of +width BITS, and returns the value of the element specified by OFFSET +as an unsigned integer. BITS therefore specifies the number of bits +that are reserved for each element in the bit vector. This must +be a power of two from 1 to 32 (or 64, if your platform supports +that). + +If BITS is 8, "elements" coincide with bytes of the input string. + +If BITS is 16 or more, bytes of the input string are grouped into chunks +of size BITS/8, and each group is converted to a number as with +pack()/unpack() with big-endian formats C<n>/C<N> (and analoguously +for BITS==64). See L<"pack"> for details. + +If bits is 4 or less, the string is broken into bytes, then the bits +of each byte are broken into 8/BITS groups. Bits of a byte are +numbered in a little-endian-ish way, as in C<0x01>, C<0x02>, +C<0x04>, C<0x08>, C<0x10>, C<0x20>, C<0x40>, C<0x80>. For example, +breaking the single input byte C<chr(0x36)> into two groups gives a list +C<(0x6, 0x3)>; breaking it into 4 groups gives C<(0x2, 0x1, 0x3, 0x0)>. + +C<vec> may also be assigned to, in which case parentheses are needed +to give the expression the correct precedence as in vec($image, $max_x * $x + $y, 8) = 3; -Vectors created with C<vec()> can also be manipulated with the logical -operators C<|>, C<&>, and C<^>, which will assume a bit vector operation is -desired when both operands are strings. See L<perlop/"Bitwise String Operators">. +If the selected element is off the end of the string, the value 0 is +returned. If an element off the end of the string is written to, +Perl will first extend the string with sufficiently many zero bytes. + +Strings created with C<vec> can also be manipulated with the logical +operators C<|>, C<&>, C<^>, and C<~>. These operators will assume a bit +vector operation is desired when both operands are strings. +See L<perlop/"Bitwise String Operators">. The following code will build up an ASCII string saying C<'PerlPerlPerl'>. -The comments show the string after each step. Note that this code works +The comments show the string after each step. Note that this code works in the same way on big-endian or little-endian machines. my $foo = ''; vec($foo, 0, 32) = 0x5065726C; # 'Perl' + + # $foo eq "Perl" eq "\x50\x65\x72\x6C", 32 bits + print vec($foo, 0, 8); # prints 80 == 0x50 == ord('P') + vec($foo, 2, 16) = 0x5065; # 'PerlPe' vec($foo, 3, 16) = 0x726C; # 'PerlPerl' vec($foo, 8, 8) = 0x50; # 'PerlPerlP' @@ -4723,18 +5484,183 @@ in the same way on big-endian or little-endian machines. vec($foo, 94, 1) = 1; # 'PerlPerlPerl' # 'l' is "\x6c" -To transform a bit vector into a string or array of 0's and 1's, use these: +To transform a bit vector into a string or list of 0's and 1's, use these: $bits = unpack("b*", $vector); @bits = split(//, unpack("b*", $vector)); If you know the exact length in bits, it can be used in place of the C<*>. +Here is an example to illustrate how the bits actually fall in place: + + #!/usr/bin/perl -wl + + print <<'EOT'; + 0 1 2 3 + unpack("V",$_) 01234567890123456789012345678901 + ------------------------------------------------------------------ + EOT + + for $w (0..3) { + $width = 2**$w; + for ($shift=0; $shift < $width; ++$shift) { + for ($off=0; $off < 32/$width; ++$off) { + $str = pack("B*", "0"x32); + $bits = (1<<$shift); + vec($str, $off, $width) = $bits; + $res = unpack("b*",$str); + $val = unpack("V", $str); + write; + } + } + } + + format STDOUT = + vec($_,@#,@#) = @<< == @######### @>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + $off, $width, $bits, $val, $res + . + __END__ + +Regardless of the machine architecture on which it is run, the above +example should print the following table: + + 0 1 2 3 + unpack("V",$_) 01234567890123456789012345678901 + ------------------------------------------------------------------ + vec($_, 0, 1) = 1 == 1 10000000000000000000000000000000 + vec($_, 1, 1) = 1 == 2 01000000000000000000000000000000 + vec($_, 2, 1) = 1 == 4 00100000000000000000000000000000 + vec($_, 3, 1) = 1 == 8 00010000000000000000000000000000 + vec($_, 4, 1) = 1 == 16 00001000000000000000000000000000 + vec($_, 5, 1) = 1 == 32 00000100000000000000000000000000 + vec($_, 6, 1) = 1 == 64 00000010000000000000000000000000 + vec($_, 7, 1) = 1 == 128 00000001000000000000000000000000 + vec($_, 8, 1) = 1 == 256 00000000100000000000000000000000 + vec($_, 9, 1) = 1 == 512 00000000010000000000000000000000 + vec($_,10, 1) = 1 == 1024 00000000001000000000000000000000 + vec($_,11, 1) = 1 == 2048 00000000000100000000000000000000 + vec($_,12, 1) = 1 == 4096 00000000000010000000000000000000 + vec($_,13, 1) = 1 == 8192 00000000000001000000000000000000 + vec($_,14, 1) = 1 == 16384 00000000000000100000000000000000 + vec($_,15, 1) = 1 == 32768 00000000000000010000000000000000 + vec($_,16, 1) = 1 == 65536 00000000000000001000000000000000 + vec($_,17, 1) = 1 == 131072 00000000000000000100000000000000 + vec($_,18, 1) = 1 == 262144 00000000000000000010000000000000 + vec($_,19, 1) = 1 == 524288 00000000000000000001000000000000 + vec($_,20, 1) = 1 == 1048576 00000000000000000000100000000000 + vec($_,21, 1) = 1 == 2097152 00000000000000000000010000000000 + vec($_,22, 1) = 1 == 4194304 00000000000000000000001000000000 + vec($_,23, 1) = 1 == 8388608 00000000000000000000000100000000 + vec($_,24, 1) = 1 == 16777216 00000000000000000000000010000000 + vec($_,25, 1) = 1 == 33554432 00000000000000000000000001000000 + vec($_,26, 1) = 1 == 67108864 00000000000000000000000000100000 + vec($_,27, 1) = 1 == 134217728 00000000000000000000000000010000 + vec($_,28, 1) = 1 == 268435456 00000000000000000000000000001000 + vec($_,29, 1) = 1 == 536870912 00000000000000000000000000000100 + vec($_,30, 1) = 1 == 1073741824 00000000000000000000000000000010 + vec($_,31, 1) = 1 == 2147483648 00000000000000000000000000000001 + vec($_, 0, 2) = 1 == 1 10000000000000000000000000000000 + vec($_, 1, 2) = 1 == 4 00100000000000000000000000000000 + vec($_, 2, 2) = 1 == 16 00001000000000000000000000000000 + vec($_, 3, 2) = 1 == 64 00000010000000000000000000000000 + vec($_, 4, 2) = 1 == 256 00000000100000000000000000000000 + vec($_, 5, 2) = 1 == 1024 00000000001000000000000000000000 + vec($_, 6, 2) = 1 == 4096 00000000000010000000000000000000 + vec($_, 7, 2) = 1 == 16384 00000000000000100000000000000000 + vec($_, 8, 2) = 1 == 65536 00000000000000001000000000000000 + vec($_, 9, 2) = 1 == 262144 00000000000000000010000000000000 + vec($_,10, 2) = 1 == 1048576 00000000000000000000100000000000 + vec($_,11, 2) = 1 == 4194304 00000000000000000000001000000000 + vec($_,12, 2) = 1 == 16777216 00000000000000000000000010000000 + vec($_,13, 2) = 1 == 67108864 00000000000000000000000000100000 + vec($_,14, 2) = 1 == 268435456 00000000000000000000000000001000 + vec($_,15, 2) = 1 == 1073741824 00000000000000000000000000000010 + vec($_, 0, 2) = 2 == 2 01000000000000000000000000000000 + vec($_, 1, 2) = 2 == 8 00010000000000000000000000000000 + vec($_, 2, 2) = 2 == 32 00000100000000000000000000000000 + vec($_, 3, 2) = 2 == 128 00000001000000000000000000000000 + vec($_, 4, 2) = 2 == 512 00000000010000000000000000000000 + vec($_, 5, 2) = 2 == 2048 00000000000100000000000000000000 + vec($_, 6, 2) = 2 == 8192 00000000000001000000000000000000 + vec($_, 7, 2) = 2 == 32768 00000000000000010000000000000000 + vec($_, 8, 2) = 2 == 131072 00000000000000000100000000000000 + vec($_, 9, 2) = 2 == 524288 00000000000000000001000000000000 + vec($_,10, 2) = 2 == 2097152 00000000000000000000010000000000 + vec($_,11, 2) = 2 == 8388608 00000000000000000000000100000000 + vec($_,12, 2) = 2 == 33554432 00000000000000000000000001000000 + vec($_,13, 2) = 2 == 134217728 00000000000000000000000000010000 + vec($_,14, 2) = 2 == 536870912 00000000000000000000000000000100 + vec($_,15, 2) = 2 == 2147483648 00000000000000000000000000000001 + vec($_, 0, 4) = 1 == 1 10000000000000000000000000000000 + vec($_, 1, 4) = 1 == 16 00001000000000000000000000000000 + vec($_, 2, 4) = 1 == 256 00000000100000000000000000000000 + vec($_, 3, 4) = 1 == 4096 00000000000010000000000000000000 + vec($_, 4, 4) = 1 == 65536 00000000000000001000000000000000 + vec($_, 5, 4) = 1 == 1048576 00000000000000000000100000000000 + vec($_, 6, 4) = 1 == 16777216 00000000000000000000000010000000 + vec($_, 7, 4) = 1 == 268435456 00000000000000000000000000001000 + vec($_, 0, 4) = 2 == 2 01000000000000000000000000000000 + vec($_, 1, 4) = 2 == 32 00000100000000000000000000000000 + vec($_, 2, 4) = 2 == 512 00000000010000000000000000000000 + vec($_, 3, 4) = 2 == 8192 00000000000001000000000000000000 + vec($_, 4, 4) = 2 == 131072 00000000000000000100000000000000 + vec($_, 5, 4) = 2 == 2097152 00000000000000000000010000000000 + vec($_, 6, 4) = 2 == 33554432 00000000000000000000000001000000 + vec($_, 7, 4) = 2 == 536870912 00000000000000000000000000000100 + vec($_, 0, 4) = 4 == 4 00100000000000000000000000000000 + vec($_, 1, 4) = 4 == 64 00000010000000000000000000000000 + vec($_, 2, 4) = 4 == 1024 00000000001000000000000000000000 + vec($_, 3, 4) = 4 == 16384 00000000000000100000000000000000 + vec($_, 4, 4) = 4 == 262144 00000000000000000010000000000000 + vec($_, 5, 4) = 4 == 4194304 00000000000000000000001000000000 + vec($_, 6, 4) = 4 == 67108864 00000000000000000000000000100000 + vec($_, 7, 4) = 4 == 1073741824 00000000000000000000000000000010 + vec($_, 0, 4) = 8 == 8 00010000000000000000000000000000 + vec($_, 1, 4) = 8 == 128 00000001000000000000000000000000 + vec($_, 2, 4) = 8 == 2048 00000000000100000000000000000000 + vec($_, 3, 4) = 8 == 32768 00000000000000010000000000000000 + vec($_, 4, 4) = 8 == 524288 00000000000000000001000000000000 + vec($_, 5, 4) = 8 == 8388608 00000000000000000000000100000000 + vec($_, 6, 4) = 8 == 134217728 00000000000000000000000000010000 + vec($_, 7, 4) = 8 == 2147483648 00000000000000000000000000000001 + vec($_, 0, 8) = 1 == 1 10000000000000000000000000000000 + vec($_, 1, 8) = 1 == 256 00000000100000000000000000000000 + vec($_, 2, 8) = 1 == 65536 00000000000000001000000000000000 + vec($_, 3, 8) = 1 == 16777216 00000000000000000000000010000000 + vec($_, 0, 8) = 2 == 2 01000000000000000000000000000000 + vec($_, 1, 8) = 2 == 512 00000000010000000000000000000000 + vec($_, 2, 8) = 2 == 131072 00000000000000000100000000000000 + vec($_, 3, 8) = 2 == 33554432 00000000000000000000000001000000 + vec($_, 0, 8) = 4 == 4 00100000000000000000000000000000 + vec($_, 1, 8) = 4 == 1024 00000000001000000000000000000000 + vec($_, 2, 8) = 4 == 262144 00000000000000000010000000000000 + vec($_, 3, 8) = 4 == 67108864 00000000000000000000000000100000 + vec($_, 0, 8) = 8 == 8 00010000000000000000000000000000 + vec($_, 1, 8) = 8 == 2048 00000000000100000000000000000000 + vec($_, 2, 8) = 8 == 524288 00000000000000000001000000000000 + vec($_, 3, 8) = 8 == 134217728 00000000000000000000000000010000 + vec($_, 0, 8) = 16 == 16 00001000000000000000000000000000 + vec($_, 1, 8) = 16 == 4096 00000000000010000000000000000000 + vec($_, 2, 8) = 16 == 1048576 00000000000000000000100000000000 + vec($_, 3, 8) = 16 == 268435456 00000000000000000000000000001000 + vec($_, 0, 8) = 32 == 32 00000100000000000000000000000000 + vec($_, 1, 8) = 32 == 8192 00000000000001000000000000000000 + vec($_, 2, 8) = 32 == 2097152 00000000000000000000010000000000 + vec($_, 3, 8) = 32 == 536870912 00000000000000000000000000000100 + vec($_, 0, 8) = 64 == 64 00000010000000000000000000000000 + vec($_, 1, 8) = 64 == 16384 00000000000000100000000000000000 + vec($_, 2, 8) = 64 == 4194304 00000000000000000000001000000000 + vec($_, 3, 8) = 64 == 1073741824 00000000000000000000000000000010 + vec($_, 0, 8) = 128 == 128 00000001000000000000000000000000 + vec($_, 1, 8) = 128 == 32768 00000000000000010000000000000000 + vec($_, 2, 8) = 128 == 8388608 00000000000000000000000100000000 + vec($_, 3, 8) = 128 == 2147483648 00000000000000000000000000000001 + =item wait Behaves like the wait(2) system call on your system: it waits for a child process to terminate and returns the pid of the deceased process, or -C<-1> if there are no child processes. The status is rketurned in C<$?>. +C<-1> if there are no child processes. The status is returned in C<$?>. Note that a return value of C<-1> could mean that child processes are being automatically reaped, as described in L<perlipc>. @@ -4764,8 +5690,8 @@ and for other examples. =item wantarray -Returns TRUE if the context of the currently executing subroutine is -looking for a list value. Returns FALSE if the context is looking +Returns true if the context of the currently executing subroutine is +looking for a list value. Returns false if the context is looking for a scalar. Returns the undefined value if the context is looking for no value (void context). @@ -4773,30 +5699,32 @@ for no value (void context). my @a = complex_calculation(); return wantarray ? @a : "@a"; +This function should have been named wantlist() instead. + =item warn LIST -Produces a message on STDERR just like C<die()>, but doesn't exit or throw +Produces a message on STDERR just like C<die>, but doesn't exit or throw an exception. If LIST is empty and C<$@> already contains a value (typically from a previous eval) that value is used after appending C<"\t...caught"> -to C<$@>. This is useful for staying almost, but not entirely similar to -C<die()>. +to C<$@>. This is useful for staying almost, but not entirely similar to +C<die>. If C<$@> is empty then the string C<"Warning: Something's wrong"> is used. No message is printed if there is a C<$SIG{__WARN__}> handler installed. It is the handler's responsibility to deal with the message -as it sees fit (like, for instance, converting it into a C<die()>). Most +as it sees fit (like, for instance, converting it into a C<die>). Most handlers must therefore make arrangements to actually display the -warnings that they are not prepared to deal with, by calling C<warn()> +warnings that they are not prepared to deal with, by calling C<warn> again in the handler. Note that this is quite safe and will not produce an endless loop, since C<__WARN__> hooks are not called from inside one. You will find this behavior is slightly different from that of C<$SIG{__DIE__}> handlers (which don't suppress the error text, but can -instead call C<die()> again to change it). +instead call C<die> again to change it). Using a C<__WARN__> handler provides a powerful way to silence all warnings (even the so-called mandatory ones). An example: @@ -4825,7 +5753,7 @@ carp() and cluck() functions. Writes a formatted record (possibly multi-line) to the specified FILEHANDLE, using the format associated with that file. By default the format for a file is the one having the same name as the filehandle, but the -format for the current output channel (see the C<select()> function) may be set +format for the current output channel (see the C<select> function) may be set explicitly by assigning the name of the format to the C<$~> variable. Top of form processing is handled automatically: if there is @@ -4840,11 +5768,11 @@ variable C<$->, which can be set to C<0> to force a new page. If FILEHANDLE is unspecified, output goes to the current default output channel, which starts out as STDOUT but may be changed by the -C<select()> operator. If the FILEHANDLE is an EXPR, then the expression +C<select> operator. If the FILEHANDLE is an EXPR, then the expression is evaluated and the resulting string is used to look up the name of the FILEHANDLE at run time. For more on formats, see L<perlform>. -Note that write is I<NOT> the opposite of C<read()>. Unfortunately. +Note that write is I<not> the opposite of C<read>. Unfortunately. =item y/// diff --git a/contrib/perl5/pod/perlguts.pod b/contrib/perl5/pod/perlguts.pod index 90bb716..2900b44 100644 --- a/contrib/perl5/pod/perlguts.pod +++ b/contrib/perl5/pod/perlguts.pod @@ -1,12 +1,13 @@ =head1 NAME -perlguts - Perl's Internal Functions +perlguts - Introduction to the Perl API =head1 DESCRIPTION -This document attempts to describe some of the internal functions of the -Perl executable. It is far from complete and probably contains many errors. -Please refer any questions or comments to the author below. +This document attempts to describe how to use the Perl API, as well as containing +some info on the basic workings of the Perl core. It is far from complete +and probably contains many errors. Please refer any questions or +comments to the author below. =head1 Variables @@ -22,11 +23,13 @@ Each typedef has specific routines that manipulate the various data types. =head2 What is an "IV"? -Perl uses a special typedef IV which is a simple integer type that is +Perl uses a special typedef IV which is a simple signed integer type that is guaranteed to be large enough to hold a pointer (as well as an integer). +Additionally, there is the UV, which is simply an unsigned IV. Perl also uses two special typedefs, I32 and I16, which will always be at -least 32-bits and 16-bits long, respectively. +least 32-bits and 16-bits long, respectively. (Again, there are U32 and U16, +as well.) =head2 Working with SVs @@ -38,8 +41,8 @@ The six routines are: SV* newSViv(IV); SV* newSVnv(double); - SV* newSVpv(char*, int); - SV* newSVpvn(char*, int); + SV* newSVpv(const char*, int); + SV* newSVpvn(const char*, int); SV* newSVpvf(const char*, ...); SV* newSVsv(SV*); @@ -87,27 +90,31 @@ in an SV to a C function or system call. To access the actual value that an SV points to, you can use the macros: SvIV(SV*) + SvUV(SV*) SvNV(SV*) SvPV(SV*, STRLEN len) + SvPV_nolen(SV*) -which will automatically coerce the actual scalar type into an IV, double, +which will automatically coerce the actual scalar type into an IV, UV, double, or string. In the C<SvPV> macro, the length of the string returned is placed into the -variable C<len> (this is a macro, so you do I<not> use C<&len>). If you do not -care what the length of the data is, use the global variable C<PL_na> or a -local variable of type C<STRLEN>. However using C<PL_na> can be quite -inefficient because C<PL_na> must be accessed in thread-local storage in -threaded Perl. In any case, remember that Perl allows arbitrary strings of -data that may both contain NULs and might not be terminated by a NUL. +variable C<len> (this is a macro, so you do I<not> use C<&len>). If you do +not care what the length of the data is, use the C<SvPV_nolen> macro. +Historically the C<SvPV> macro with the global variable C<PL_na> has been +used in this case. But that can be quite inefficient because C<PL_na> must +be accessed in thread-local storage in threaded Perl. In any case, remember +that Perl allows arbitrary strings of data that may both contain NULs and +might not be terminated by a NUL. Also remember that C doesn't allow you to safely say C<foo(SvPV(s, len), len);>. It might work with your compiler, but it won't work for everyone. Break this sort of statement up into separate assignments: + SV *s; STRLEN len; char * ptr; - ptr = SvPV(len); + ptr = SvPV(s, len); foo(ptr, len); If you want to know if the scalar value is TRUE, you can use: @@ -148,8 +155,8 @@ But note that these last three macros are valid only if C<SvPOK()> is true. If you want to append something to the end of string stored in an C<SV*>, you can use the following functions: - void sv_catpv(SV*, char*); - void sv_catpvn(SV*, char*, STRLEN); + void sv_catpv(SV*, const char*); + void sv_catpvn(SV*, const char*, STRLEN); void sv_catpvf(SV*, const char*, ...); void sv_catpvfn(SV*, const char*, STRLEN, va_list *, SV **, I32, bool); void sv_catsv(SV*, SV*); @@ -169,7 +176,7 @@ have "magic". See L<Magic Virtual Tables> later in this document. If you know the name of a scalar variable, you can get a pointer to its SV by using the following: - SV* perl_get_sv("package::varname", FALSE); + SV* get_sv("package::varname", FALSE); This returns NULL if the variable does not exist. @@ -280,7 +287,7 @@ then nothing is done. If you know the name of an array variable, you can get a pointer to its AV by using the following: - AV* perl_get_av("package::varname", FALSE); + AV* get_av("package::varname", FALSE); This returns NULL if the variable does not exist. @@ -295,8 +302,8 @@ To create an HV, you use the following routine: Once the HV has been created, the following operations are possible on HVs: - SV** hv_store(HV*, char* key, U32 klen, SV* val, U32 hash); - SV** hv_fetch(HV*, char* key, U32 klen, I32 lval); + SV** hv_store(HV*, const char* key, U32 klen, SV* val, U32 hash); + SV** hv_fetch(HV*, const char* key, U32 klen, I32 lval); The C<klen> parameter is the length of the key being passed in (Note that you cannot pass 0 in as a value of C<klen> to tell Perl to measure the @@ -314,8 +321,8 @@ not NULL before dereferencing it. These two functions check if a hash table entry exists, and deletes it. - bool hv_exists(HV*, char* key, U32 klen); - SV* hv_delete(HV*, char* key, U32 klen, I32 flags); + bool hv_exists(HV*, const char* key, U32 klen); + SV* hv_delete(HV*, const char* key, U32 klen, I32 flags); If C<flags> does not include the C<G_DISCARD> flag then C<hv_delete> will create and return a mortal copy of the deleted value. @@ -355,7 +362,7 @@ specified below. If you know the name of a hash variable, you can get a pointer to its HV by using the following: - HV* perl_get_hv("package::varname", FALSE); + HV* get_hv("package::varname", FALSE); This returns NULL if the variable does not exist. @@ -364,6 +371,10 @@ The hash algorithm is defined in the C<PERL_HASH(hash, key, klen)> macro: hash = 0; while (klen--) hash = (hash * 33) + *key++; + hash = hash + (hash >> 5); /* after 5.6 */ + +The last step was added in version 5.6 to improve distribution of +lower bits in the resulting hash value. See L<Understanding the Magic of Tied Hashes and Arrays> for more information on how to use the hash access functions on tied hashes. @@ -374,10 +385,10 @@ Beginning with version 5.004, the following functions are also supported: HE* hv_fetch_ent (HV* tb, SV* key, I32 lval, U32 hash); HE* hv_store_ent (HV* tb, SV* key, SV* val, U32 hash); - + bool hv_exists_ent (HV* tb, SV* key, U32 hash); SV* hv_delete_ent (HV* tb, SV* key, I32 flags, U32 hash); - + SV* hv_iterkeysv (HE* entry); Note that these functions take C<SV*> keys, which simplifies writing @@ -387,14 +398,13 @@ you to stringify the keys (unlike the previous set of functions). They also return and accept whole hash entries (C<HE*>), making their use more efficient (since the hash number for a particular string -doesn't have to be recomputed every time). See L<API LISTING> later in -this document for detailed descriptions. +doesn't have to be recomputed every time). See L<perlapi> for detailed +descriptions. The following macros must always be used to access the contents of hash entries. Note that the arguments to these macros must be simple variables, since they may get evaluated more than once. See -L<API LISTING> later in this document for detailed descriptions of these -macros. +L<perlapi> for detailed descriptions of these macros. HePV(HE* he, STRLEN len) HeVAL(HE* he) @@ -481,28 +491,28 @@ Upgrades rv to reference if not already one. Creates new SV for rv to point to. If C<classname> is non-null, the SV is blessed into the specified class. SV is returned. - SV* newSVrv(SV* rv, char* classname); + SV* newSVrv(SV* rv, const char* classname); Copies integer or double into an SV whose reference is C<rv>. SV is blessed if C<classname> is non-null. - SV* sv_setref_iv(SV* rv, char* classname, IV iv); - SV* sv_setref_nv(SV* rv, char* classname, NV iv); + SV* sv_setref_iv(SV* rv, const char* classname, IV iv); + SV* sv_setref_nv(SV* rv, const char* classname, NV iv); Copies the pointer value (I<the address, not the string!>) into an SV whose reference is rv. SV is blessed if C<classname> is non-null. - SV* sv_setref_pv(SV* rv, char* classname, PV iv); + SV* sv_setref_pv(SV* rv, const char* classname, PV iv); Copies string into an SV whose reference is C<rv>. Set length to 0 to let Perl calculate the string length. SV is blessed if C<classname> is non-null. - SV* sv_setref_pvn(SV* rv, char* classname, PV iv, STRLEN length); + SV* sv_setref_pvn(SV* rv, const char* classname, PV iv, STRLEN length); Tests whether the SV is blessed into the specified class. It does not check inheritance relationships. - int sv_isa(SV* sv, char* name); + int sv_isa(SV* sv, const char* name); Tests whether the SV is a reference to a blessed object. @@ -512,7 +522,7 @@ Tests whether the SV is derived from the specified class. SV can be either a reference to a blessed object or a string containing a class name. This is the function implementing the C<UNIVERSAL::isa> functionality. - bool sv_derived_from(SV* sv, char* name); + bool sv_derived_from(SV* sv, const char* name); To check if you've got an object derived from a specific class you have to write: @@ -524,9 +534,9 @@ to write: To create a new Perl variable with an undef value which can be accessed from your Perl script, use the following routines, depending on the variable type. - SV* perl_get_sv("package::varname", TRUE); - AV* perl_get_av("package::varname", TRUE); - HV* perl_get_hv("package::varname", TRUE); + SV* get_sv("package::varname", TRUE); + AV* get_av("package::varname", TRUE); + HV* get_hv("package::varname", TRUE); Notice the use of TRUE as the second parameter. The new variable can now be set, using the routines appropriate to the data type. @@ -635,7 +645,7 @@ in the stash "Baz::" in "Bar::"'s stash. To get the stash pointer for a particular package, use the function: - HV* gv_stashpv(char* name, I32 create) + HV* gv_stashpv(const char* name, I32 create) HV* gv_stashsv(SV*, I32 create) The first function takes a literal string, the second uses the string stored @@ -699,7 +709,7 @@ following code: extern int dberror; extern char *dberror_list; - SV* sv = perl_get_sv("dberror", TRUE); + SV* sv = get_sv("dberror", TRUE); sv_setiv(sv, (IV) dberror); sv_setpv(sv, dberror_list[dberror]); SvIOK_on(sv); @@ -733,7 +743,7 @@ Note this is current as of patchlevel 0, and could change at any time. Perl adds magic to an SV using the sv_magic function: - void sv_magic(SV* sv, SV* obj, int how, char* name, I32 namlen); + void sv_magic(SV* sv, SV* obj, int how, const char* name, I32 namlen); The C<sv> argument is a pointer to the SV that is to acquire a new magical feature. @@ -810,6 +820,8 @@ to an C<mg_type> of '\0') contains: Thus, when an SV is determined to be magical and of type '\0', if a get operation is being performed, the routine C<magic_get> is called. All the various routines for the various magical types begin with C<magic_>. +NOTE: the magic routines are not considered part of the Perl API, and may +not be exported by the Perl library. The current kinds of Magic Virtual Tables are: @@ -899,7 +911,7 @@ calling these functions, or by using one of the C<sv_set*_mg()> or C<sv_cat*_mg()> functions. Similarly, generic C code must call the C<SvGETMAGIC()> macro to invoke any 'get' magic if they use an SV obtained from external sources in functions that don't handle magic. -L<API LISTING> later in this document identifies such functions. +See L<perlapi> for a description of these functions. For example, calls to the C<sv_cat*()> functions typically need to be followed by C<SvSETMAGIC()>, but they don't need a prior C<SvGETMAGIC()> since their implementation handles 'get' magic. @@ -912,7 +924,7 @@ This routine returns a pointer to the C<MAGIC> structure stored in the SV. If the SV does not have that magical feature, C<NULL> is returned. Also, if the SV is not of type SVt_PVMG, Perl may core dump. - int mg_copy(SV* sv, SV* nsv, char* key, STRLEN klen); + int mg_copy(SV* sv, SV* nsv, const char* key, STRLEN klen); This routine checks to see what types of magic C<sv> has. If the mg_type field is an uppercase letter, then the mg_obj is copied to C<nsv>, but @@ -1092,10 +1104,15 @@ this: SAVEDELETE(PL_defstash, savepv(tmpbuf), strlen(tmpbuf)); -=item C<SAVEDESTRUCTOR(f,p)> +=item C<SAVEDESTRUCTOR(DESTRUCTORFUNC_NOCONTEXT_t f, void *p)> At the end of I<pseudo-block> the function C<f> is called with the -only argument (of type C<void*>) C<p>. +only argument C<p>. + +=item C<SAVEDESTRUCTOR_X(DESTRUCTORFUNC_t f, void *p)> + +At the end of I<pseudo-block> the function C<f> is called with the +implicit context argument (if any), and C<p>. =item C<SAVESTACK_POS()> @@ -1206,12 +1223,12 @@ For more information, consult L<perlxs> and L<perlxstut>. There are four routines that can be used to call a Perl subroutine from within a C program. These four are: - I32 perl_call_sv(SV*, I32); - I32 perl_call_pv(char*, I32); - I32 perl_call_method(char*, I32); - I32 perl_call_argv(char*, I32, register char**); + I32 call_sv(SV*, I32); + I32 call_pv(const char*, I32); + I32 call_method(const char*, I32); + I32 call_argv(const char*, I32, register char**); -The routine most often used is C<perl_call_sv>. The C<SV*> argument +The routine most often used is C<call_sv>. The C<SV*> argument contains either the name of the Perl subroutine to be called, or a reference to the subroutine. The second argument consists of flags that control the context in which the subroutine is called, whether @@ -1221,7 +1238,11 @@ trapped, and how to treat return values. All four routines return the number of arguments that the subroutine returned on the Perl stack. -When using any of these routines (except C<perl_call_argv>), the programmer +These routines used to be called C<perl_call_sv> etc., before Perl v5.6.0, +but those names are now deprecated; macros of the same name are provided for +compatibility. + +When using any of these routines (except C<call_argv>), the programmer must manipulate the Perl stack. These include the following macros and functions: @@ -1500,2093 +1521,272 @@ additional complications for conditionals). These optimizations are done in the subroutine peep(). Optimizations performed at this stage are subject to the same restrictions as in the pass 2. -=head1 API LISTING - -This is a listing of functions, macros, flags, and variables that may be -useful to extension writers or that may be found while reading other -extensions. - -Note that all Perl API global variables must be referenced with the C<PL_> -prefix. Some macros are provided for compatibility with the older, -unadorned names, but this support will be removed in a future release. - -It is strongly recommended that all Perl API functions that don't begin -with C<perl> be referenced with an explicit C<Perl_> prefix. - -The sort order of the listing is case insensitive, with any -occurrences of '_' ignored for the purpose of sorting. - -=over 8 - -=item av_clear - -Clears an array, making it empty. Does not free the memory used by the -array itself. - - void av_clear (AV* ar) - -=item av_extend - -Pre-extend an array. The C<key> is the index to which the array should be -extended. - - void av_extend (AV* ar, I32 key) - -=item av_fetch - -Returns the SV at the specified index in the array. The C<key> is the -index. If C<lval> is set then the fetch will be part of a store. Check -that the return value is non-null before dereferencing it to a C<SV*>. - -See L<Understanding the Magic of Tied Hashes and Arrays> for more -information on how to use this function on tied arrays. - - SV** av_fetch (AV* ar, I32 key, I32 lval) - -=item AvFILL - -Same as C<av_len()>. Deprecated, use C<av_len()> instead. - -=item av_len - -Returns the highest index in the array. Returns -1 if the array is empty. - - I32 av_len (AV* ar) - -=item av_make - -Creates a new AV and populates it with a list of SVs. The SVs are copied -into the array, so they may be freed after the call to av_make. The new AV -will have a reference count of 1. - - AV* av_make (I32 size, SV** svp) - -=item av_pop - -Pops an SV off the end of the array. Returns C<&PL_sv_undef> if the array is -empty. - - SV* av_pop (AV* ar) - -=item av_push - -Pushes an SV onto the end of the array. The array will grow automatically -to accommodate the addition. - - void av_push (AV* ar, SV* val) - -=item av_shift - -Shifts an SV off the beginning of the array. - - SV* av_shift (AV* ar) - -=item av_store - -Stores an SV in an array. The array index is specified as C<key>. The -return value will be NULL if the operation failed or if the value did not -need to be actually stored within the array (as in the case of tied arrays). -Otherwise it can be dereferenced to get the original C<SV*>. Note that the -caller is responsible for suitably incrementing the reference count of C<val> -before the call, and decrementing it if the function returned NULL. - -See L<Understanding the Magic of Tied Hashes and Arrays> for more -information on how to use this function on tied arrays. - - SV** av_store (AV* ar, I32 key, SV* val) - -=item av_undef - -Undefines the array. Frees the memory used by the array itself. - - void av_undef (AV* ar) - -=item av_unshift - -Unshift the given number of C<undef> values onto the beginning of the -array. The array will grow automatically to accommodate the addition. -You must then use C<av_store> to assign values to these new elements. - - void av_unshift (AV* ar, I32 num) - -=item CLASS - -Variable which is setup by C<xsubpp> to indicate the class name for a C++ XS -constructor. This is always a C<char*>. See C<THIS> and -L<perlxs/"Using XS With C++">. - -=item Copy - -The XSUB-writer's interface to the C C<memcpy> function. The C<s> is the -source, C<d> is the destination, C<n> is the number of items, and C<t> is -the type. May fail on overlapping copies. See also C<Move>. - - void Copy( s, d, n, t ) - -=item croak - -This is the XSUB-writer's interface to Perl's C<die> function. Use this -function the same way you use the C C<printf> function. See C<warn>. - -=item CvSTASH - -Returns the stash of the CV. - - HV* CvSTASH( SV* sv ) - -=item PL_DBsingle - -When Perl is run in debugging mode, with the B<-d> switch, this SV is a -boolean which indicates whether subs are being single-stepped. -Single-stepping is automatically turned on after every step. This is the C -variable which corresponds to Perl's $DB::single variable. See C<PL_DBsub>. - -=item PL_DBsub - -When Perl is run in debugging mode, with the B<-d> switch, this GV contains -the SV which holds the name of the sub being debugged. This is the C -variable which corresponds to Perl's $DB::sub variable. See C<PL_DBsingle>. -The sub name can be found by - - SvPV( GvSV( PL_DBsub ), len ) - -=item PL_DBtrace - -Trace variable used when Perl is run in debugging mode, with the B<-d> -switch. This is the C variable which corresponds to Perl's $DB::trace -variable. See C<PL_DBsingle>. - -=item dMARK - -Declare a stack marker variable, C<mark>, for the XSUB. See C<MARK> and -C<dORIGMARK>. - -=item dORIGMARK - -Saves the original stack mark for the XSUB. See C<ORIGMARK>. - -=item PL_dowarn - -The C variable which corresponds to Perl's $^W warning variable. - -=item dSP - -Declares a local copy of perl's stack pointer for the XSUB, available via -the C<SP> macro. See C<SP>. - -=item dXSARGS - -Sets up stack and mark pointers for an XSUB, calling dSP and dMARK. This is -usually handled automatically by C<xsubpp>. Declares the C<items> variable -to indicate the number of items on the stack. - -=item dXSI32 - -Sets up the C<ix> variable for an XSUB which has aliases. This is usually -handled automatically by C<xsubpp>. - -=item do_binmode - -Switches filehandle to binmode. C<iotype> is what C<IoTYPE(io)> would -contain. - - do_binmode(fp, iotype, TRUE); - -=item ENTER - -Opening bracket on a callback. See C<LEAVE> and L<perlcall>. - - ENTER; - -=item EXTEND - -Used to extend the argument stack for an XSUB's return values. - - EXTEND( sp, int x ) - -=item fbm_compile - -Analyses the string in order to make fast searches on it using fbm_instr() -- -the Boyer-Moore algorithm. - - void fbm_compile(SV* sv, U32 flags) - -=item fbm_instr - -Returns the location of the SV in the string delimited by C<str> and -C<strend>. It returns C<Nullch> if the string can't be found. The -C<sv> does not have to be fbm_compiled, but the search will not be as -fast then. - - char* fbm_instr(char *str, char *strend, SV *sv, U32 flags) - -=item FREETMPS - -Closing bracket for temporaries on a callback. See C<SAVETMPS> and -L<perlcall>. - - FREETMPS; - -=item G_ARRAY - -Used to indicate array context. See C<GIMME_V>, C<GIMME> and L<perlcall>. - -=item G_DISCARD - -Indicates that arguments returned from a callback should be discarded. See -L<perlcall>. - -=item G_EVAL - -Used to force a Perl C<eval> wrapper around a callback. See L<perlcall>. - -=item GIMME - -A backward-compatible version of C<GIMME_V> which can only return -C<G_SCALAR> or C<G_ARRAY>; in a void context, it returns C<G_SCALAR>. - -=item GIMME_V - -The XSUB-writer's equivalent to Perl's C<wantarray>. Returns -C<G_VOID>, C<G_SCALAR> or C<G_ARRAY> for void, scalar or array -context, respectively. - -=item G_NOARGS - -Indicates that no arguments are being sent to a callback. See L<perlcall>. - -=item G_SCALAR - -Used to indicate scalar context. See C<GIMME_V>, C<GIMME>, and L<perlcall>. - -=item gv_fetchmeth - -Returns the glob with the given C<name> and a defined subroutine or -C<NULL>. The glob lives in the given C<stash>, or in the stashes -accessible via @ISA and @UNIVERSAL. - -The argument C<level> should be either 0 or -1. If C<level==0>, as a -side-effect creates a glob with the given C<name> in the given -C<stash> which in the case of success contains an alias for the -subroutine, and sets up caching info for this glob. Similarly for all -the searched stashes. - -This function grants C<"SUPER"> token as a postfix of the stash name. - -The GV returned from C<gv_fetchmeth> may be a method cache entry, -which is not visible to Perl code. So when calling C<perl_call_sv>, -you should not use the GV directly; instead, you should use the -method's CV, which can be obtained from the GV with the C<GvCV> macro. - - GV* gv_fetchmeth (HV* stash, char* name, STRLEN len, I32 level) - -=item gv_fetchmethod - -=item gv_fetchmethod_autoload - -Returns the glob which contains the subroutine to call to invoke the -method on the C<stash>. In fact in the presence of autoloading this may -be the glob for "AUTOLOAD". In this case the corresponding variable -$AUTOLOAD is already setup. - -The third parameter of C<gv_fetchmethod_autoload> determines whether AUTOLOAD -lookup is performed if the given method is not present: non-zero means -yes, look for AUTOLOAD; zero means no, don't look for AUTOLOAD. Calling -C<gv_fetchmethod> is equivalent to calling C<gv_fetchmethod_autoload> with a -non-zero C<autoload> parameter. - -These functions grant C<"SUPER"> token as a prefix of the method name. - -Note that if you want to keep the returned glob for a long time, you -need to check for it being "AUTOLOAD", since at the later time the call -may load a different subroutine due to $AUTOLOAD changing its value. -Use the glob created via a side effect to do this. - -These functions have the same side-effects and as C<gv_fetchmeth> with -C<level==0>. C<name> should be writable if contains C<':'> or C<'\''>. -The warning against passing the GV returned by C<gv_fetchmeth> to -C<perl_call_sv> apply equally to these functions. - - GV* gv_fetchmethod (HV* stash, char* name) - GV* gv_fetchmethod_autoload (HV* stash, char* name, I32 autoload) - -=item G_VOID - -Used to indicate void context. See C<GIMME_V> and L<perlcall>. - -=item gv_stashpv - -Returns a pointer to the stash for a specified package. If C<create> is set -then the package will be created if it does not already exist. If C<create> -is not set and the package does not exist then NULL is returned. - - HV* gv_stashpv (char* name, I32 create) - -=item gv_stashsv - -Returns a pointer to the stash for a specified package. See C<gv_stashpv>. - - HV* gv_stashsv (SV* sv, I32 create) - -=item GvSV - -Return the SV from the GV. - -=item HEf_SVKEY - -This flag, used in the length slot of hash entries and magic -structures, specifies the structure contains a C<SV*> pointer where a -C<char*> pointer is to be expected. (For information only--not to be used). - -=item HeHASH - -Returns the computed hash stored in the hash entry. - - U32 HeHASH(HE* he) - -=item HeKEY - -Returns the actual pointer stored in the key slot of the hash entry. -The pointer may be either C<char*> or C<SV*>, depending on the value of -C<HeKLEN()>. Can be assigned to. The C<HePV()> or C<HeSVKEY()> macros -are usually preferable for finding the value of a key. - - char* HeKEY(HE* he) - -=item HeKLEN - -If this is negative, and amounts to C<HEf_SVKEY>, it indicates the entry -holds an C<SV*> key. Otherwise, holds the actual length of the key. -Can be assigned to. The C<HePV()> macro is usually preferable for finding -key lengths. - - int HeKLEN(HE* he) - -=item HePV - -Returns the key slot of the hash entry as a C<char*> value, doing any -necessary dereferencing of possibly C<SV*> keys. The length of -the string is placed in C<len> (this is a macro, so do I<not> use -C<&len>). If you do not care about what the length of the key is, -you may use the global variable C<PL_na>, though this is rather less -efficient than using a local variable. Remember though, that hash -keys in perl are free to contain embedded nulls, so using C<strlen()> -or similar is not a good way to find the length of hash keys. -This is very similar to the C<SvPV()> macro described elsewhere in -this document. - - char* HePV(HE* he, STRLEN len) - -=item HeSVKEY - -Returns the key as an C<SV*>, or C<Nullsv> if the hash entry -does not contain an C<SV*> key. - - HeSVKEY(HE* he) - -=item HeSVKEY_force - -Returns the key as an C<SV*>. Will create and return a temporary -mortal C<SV*> if the hash entry contains only a C<char*> key. - - HeSVKEY_force(HE* he) - -=item HeSVKEY_set - -Sets the key to a given C<SV*>, taking care to set the appropriate flags -to indicate the presence of an C<SV*> key, and returns the same C<SV*>. - - HeSVKEY_set(HE* he, SV* sv) - -=item HeVAL - -Returns the value slot (type C<SV*>) stored in the hash entry. - - HeVAL(HE* he) - -=item hv_clear - -Clears a hash, making it empty. - - void hv_clear (HV* tb) - -=item hv_delete - -Deletes a key/value pair in the hash. The value SV is removed from the hash -and returned to the caller. The C<klen> is the length of the key. The -C<flags> value will normally be zero; if set to G_DISCARD then NULL will be -returned. - - SV* hv_delete (HV* tb, char* key, U32 klen, I32 flags) - -=item hv_delete_ent - -Deletes a key/value pair in the hash. The value SV is removed from the hash -and returned to the caller. The C<flags> value will normally be zero; if set -to G_DISCARD then NULL will be returned. C<hash> can be a valid precomputed -hash value, or 0 to ask for it to be computed. - - SV* hv_delete_ent (HV* tb, SV* key, I32 flags, U32 hash) - -=item hv_exists - -Returns a boolean indicating whether the specified hash key exists. The -C<klen> is the length of the key. - - bool hv_exists (HV* tb, char* key, U32 klen) - -=item hv_exists_ent - -Returns a boolean indicating whether the specified hash key exists. C<hash> -can be a valid precomputed hash value, or 0 to ask for it to be computed. - - bool hv_exists_ent (HV* tb, SV* key, U32 hash) - -=item hv_fetch - -Returns the SV which corresponds to the specified key in the hash. The -C<klen> is the length of the key. If C<lval> is set then the fetch will be -part of a store. Check that the return value is non-null before -dereferencing it to a C<SV*>. - -See L<Understanding the Magic of Tied Hashes and Arrays> for more -information on how to use this function on tied hashes. - - SV** hv_fetch (HV* tb, char* key, U32 klen, I32 lval) - -=item hv_fetch_ent - -Returns the hash entry which corresponds to the specified key in the hash. -C<hash> must be a valid precomputed hash number for the given C<key>, or -0 if you want the function to compute it. IF C<lval> is set then the -fetch will be part of a store. Make sure the return value is non-null -before accessing it. The return value when C<tb> is a tied hash -is a pointer to a static location, so be sure to make a copy of the -structure if you need to store it somewhere. - -See L<Understanding the Magic of Tied Hashes and Arrays> for more -information on how to use this function on tied hashes. - - HE* hv_fetch_ent (HV* tb, SV* key, I32 lval, U32 hash) - -=item hv_iterinit - -Prepares a starting point to traverse a hash table. - - I32 hv_iterinit (HV* tb) - -Returns the number of keys in the hash (i.e. the same as C<HvKEYS(tb)>). -The return value is currently only meaningful for hashes without tie -magic. - -NOTE: Before version 5.004_65, C<hv_iterinit> used to return the number -of hash buckets that happen to be in use. If you still need that -esoteric value, you can get it through the macro C<HvFILL(tb)>. - -=item hv_iterkey - -Returns the key from the current position of the hash iterator. See -C<hv_iterinit>. - - char* hv_iterkey (HE* entry, I32* retlen) - -=item hv_iterkeysv - -Returns the key as an C<SV*> from the current position of the hash -iterator. The return value will always be a mortal copy of the -key. Also see C<hv_iterinit>. - - SV* hv_iterkeysv (HE* entry) - -=item hv_iternext - -Returns entries from a hash iterator. See C<hv_iterinit>. - - HE* hv_iternext (HV* tb) - -=item hv_iternextsv - -Performs an C<hv_iternext>, C<hv_iterkey>, and C<hv_iterval> in one -operation. - - SV* hv_iternextsv (HV* hv, char** key, I32* retlen) - -=item hv_iterval - -Returns the value from the current position of the hash iterator. See -C<hv_iterkey>. - - SV* hv_iterval (HV* tb, HE* entry) - -=item hv_magic - -Adds magic to a hash. See C<sv_magic>. - - void hv_magic (HV* hv, GV* gv, int how) - -=item HvNAME - -Returns the package name of a stash. See C<SvSTASH>, C<CvSTASH>. - - char* HvNAME (HV* stash) - -=item hv_store - -Stores an SV in a hash. The hash key is specified as C<key> and C<klen> is -the length of the key. The C<hash> parameter is the precomputed hash -value; if it is zero then Perl will compute it. The return value will be -NULL if the operation failed or if the value did not need to be actually -stored within the hash (as in the case of tied hashes). Otherwise it can -be dereferenced to get the original C<SV*>. Note that the caller is -responsible for suitably incrementing the reference count of C<val> -before the call, and decrementing it if the function returned NULL. - -See L<Understanding the Magic of Tied Hashes and Arrays> for more -information on how to use this function on tied hashes. - - SV** hv_store (HV* tb, char* key, U32 klen, SV* val, U32 hash) - -=item hv_store_ent - -Stores C<val> in a hash. The hash key is specified as C<key>. The C<hash> -parameter is the precomputed hash value; if it is zero then Perl will -compute it. The return value is the new hash entry so created. It will be -NULL if the operation failed or if the value did not need to be actually -stored within the hash (as in the case of tied hashes). Otherwise the -contents of the return value can be accessed using the C<He???> macros -described here. Note that the caller is responsible for suitably -incrementing the reference count of C<val> before the call, and decrementing -it if the function returned NULL. - -See L<Understanding the Magic of Tied Hashes and Arrays> for more -information on how to use this function on tied hashes. - - HE* hv_store_ent (HV* tb, SV* key, SV* val, U32 hash) - -=item hv_undef - -Undefines the hash. - - void hv_undef (HV* tb) - -=item isALNUM - -Returns a boolean indicating whether the C C<char> is an ascii alphanumeric -character or digit. - - int isALNUM (char c) - -=item isALPHA - -Returns a boolean indicating whether the C C<char> is an ascii alphabetic -character. - - int isALPHA (char c) - -=item isDIGIT - -Returns a boolean indicating whether the C C<char> is an ascii digit. - - int isDIGIT (char c) - -=item isLOWER - -Returns a boolean indicating whether the C C<char> is a lowercase character. - - int isLOWER (char c) - -=item isSPACE - -Returns a boolean indicating whether the C C<char> is whitespace. - - int isSPACE (char c) - -=item isUPPER - -Returns a boolean indicating whether the C C<char> is an uppercase character. - - int isUPPER (char c) - -=item items - -Variable which is setup by C<xsubpp> to indicate the number of items on the -stack. See L<perlxs/"Variable-length Parameter Lists">. - -=item ix - -Variable which is setup by C<xsubpp> to indicate which of an XSUB's aliases -was used to invoke it. See L<perlxs/"The ALIAS: Keyword">. - -=item LEAVE - -Closing bracket on a callback. See C<ENTER> and L<perlcall>. - - LEAVE; - -=item looks_like_number - -Test if an the content of an SV looks like a number (or is a number). - - int looks_like_number(SV*) - - -=item MARK - -Stack marker variable for the XSUB. See C<dMARK>. - -=item mg_clear - -Clear something magical that the SV represents. See C<sv_magic>. - - int mg_clear (SV* sv) - -=item mg_copy - -Copies the magic from one SV to another. See C<sv_magic>. - - int mg_copy (SV *, SV *, char *, STRLEN) - -=item mg_find - -Finds the magic pointer for type matching the SV. See C<sv_magic>. - - MAGIC* mg_find (SV* sv, int type) - -=item mg_free - -Free any magic storage used by the SV. See C<sv_magic>. - - int mg_free (SV* sv) - -=item mg_get - -Do magic after a value is retrieved from the SV. See C<sv_magic>. - - int mg_get (SV* sv) - -=item mg_len - -Report on the SV's length. See C<sv_magic>. - - U32 mg_len (SV* sv) - -=item mg_magical - -Turns on the magical status of an SV. See C<sv_magic>. - - void mg_magical (SV* sv) - -=item mg_set - -Do magic after a value is assigned to the SV. See C<sv_magic>. - - int mg_set (SV* sv) - -=item modglobal - -C<modglobal> is a general purpose, interpreter global HV for use by -extensions that need to keep information on a per-interpreter basis. -In a pinch, it can also be used as a symbol table for extensions -to share data among each other. It is a good idea to use keys -prefixed by the package name of the extension that owns the data. - -=item Move - -The XSUB-writer's interface to the C C<memmove> function. The C<s> is the -source, C<d> is the destination, C<n> is the number of items, and C<t> is -the type. Can do overlapping moves. See also C<Copy>. - - void Move( s, d, n, t ) - -=item PL_na - -A convenience variable which is typically used with C<SvPV> when one doesn't -care about the length of the string. It is usually more efficient to -declare a local variable and use that instead. - -=item New - -The XSUB-writer's interface to the C C<malloc> function. - - void* New( x, void *ptr, int size, type ) - -=item newAV - -Creates a new AV. The reference count is set to 1. - - AV* newAV (void) - -=item Newc - -The XSUB-writer's interface to the C C<malloc> function, with cast. - - void* Newc( x, void *ptr, int size, type, cast ) - -=item newCONSTSUB - -Creates a constant sub equivalent to Perl C<sub FOO () { 123 }> -which is eligible for inlining at compile-time. - - void newCONSTSUB(HV* stash, char* name, SV* sv) - -=item newHV - -Creates a new HV. The reference count is set to 1. - - HV* newHV (void) - -=item newRV_inc - -Creates an RV wrapper for an SV. The reference count for the original SV is -incremented. - - SV* newRV_inc (SV* ref) - -For historical reasons, "newRV" is a synonym for "newRV_inc". - -=item newRV_noinc - -Creates an RV wrapper for an SV. The reference count for the original -SV is B<not> incremented. - - SV* newRV_noinc (SV* ref) - -=item NEWSV - -Creates a new SV. A non-zero C<len> parameter indicates the number of -bytes of preallocated string space the SV should have. An extra byte -for a tailing NUL is also reserved. (SvPOK is not set for the SV even -if string space is allocated.) The reference count for the new SV is -set to 1. C<id> is an integer id between 0 and 1299 (used to identify -leaks). - - SV* NEWSV (int id, STRLEN len) - -=item newSViv - -Creates a new SV and copies an integer into it. The reference count for the -SV is set to 1. - - SV* newSViv (IV i) - -=item newSVnv - -Creates a new SV and copies a double into it. The reference count for the -SV is set to 1. - - SV* newSVnv (NV i) - -=item newSVpv - -Creates a new SV and copies a string into it. The reference count for the -SV is set to 1. If C<len> is zero then Perl will compute the length. - - SV* newSVpv (char* s, STRLEN len) - -=item newSVpvf - -Creates a new SV an initialize it with the string formatted like -C<sprintf>. - - SV* newSVpvf(const char* pat, ...); - -=item newSVpvn - -Creates a new SV and copies a string into it. The reference count for the -SV is set to 1. If C<len> is zero then Perl will create a zero length -string. - - SV* newSVpvn (char* s, STRLEN len) - -=item newSVrv - -Creates a new SV for the RV, C<rv>, to point to. If C<rv> is not an RV then -it will be upgraded to one. If C<classname> is non-null then the new SV will -be blessed in the specified package. The new SV is returned and its -reference count is 1. - - SV* newSVrv (SV* rv, char* classname) - -=item newSVsv - -Creates a new SV which is an exact duplicate of the original SV. - - SV* newSVsv (SV* old) - -=item newXS - -Used by C<xsubpp> to hook up XSUBs as Perl subs. - -=item newXSproto - -Used by C<xsubpp> to hook up XSUBs as Perl subs. Adds Perl prototypes to -the subs. - -=item Newz - -The XSUB-writer's interface to the C C<malloc> function. The allocated -memory is zeroed with C<memzero>. - - void* Newz( x, void *ptr, int size, type ) - -=item Nullav - -Null AV pointer. - -=item Nullch - -Null character pointer. - -=item Nullcv - -Null CV pointer. - -=item Nullhv - -Null HV pointer. - -=item Nullsv - -Null SV pointer. - -=item ORIGMARK - -The original stack mark for the XSUB. See C<dORIGMARK>. - -=item perl_alloc - -Allocates a new Perl interpreter. See L<perlembed>. - -=item perl_call_argv - -Performs a callback to the specified Perl sub. See L<perlcall>. - - I32 perl_call_argv (char* subname, I32 flags, char** argv) - -=item perl_call_method - -Performs a callback to the specified Perl method. The blessed object must -be on the stack. See L<perlcall>. - - I32 perl_call_method (char* methname, I32 flags) - -=item perl_call_pv - -Performs a callback to the specified Perl sub. See L<perlcall>. - - I32 perl_call_pv (char* subname, I32 flags) - -=item perl_call_sv - -Performs a callback to the Perl sub whose name is in the SV. See -L<perlcall>. - - I32 perl_call_sv (SV* sv, I32 flags) - -=item perl_construct - -Initializes a new Perl interpreter. See L<perlembed>. - -=item perl_destruct - -Shuts down a Perl interpreter. See L<perlembed>. - -=item perl_eval_sv - -Tells Perl to C<eval> the string in the SV. - - I32 perl_eval_sv (SV* sv, I32 flags) - -=item perl_eval_pv - -Tells Perl to C<eval> the given string and return an SV* result. - - SV* perl_eval_pv (char* p, I32 croak_on_error) - -=item perl_free - -Releases a Perl interpreter. See L<perlembed>. - -=item perl_get_av - -Returns the AV of the specified Perl array. If C<create> is set and the -Perl variable does not exist then it will be created. If C<create> is not -set and the variable does not exist then NULL is returned. - - AV* perl_get_av (char* name, I32 create) - -=item perl_get_cv - -Returns the CV of the specified Perl sub. If C<create> is set and the Perl -variable does not exist then it will be created. If C<create> is not -set and the variable does not exist then NULL is returned. - - CV* perl_get_cv (char* name, I32 create) - -=item perl_get_hv - -Returns the HV of the specified Perl hash. If C<create> is set and the Perl -variable does not exist then it will be created. If C<create> is not -set and the variable does not exist then NULL is returned. - - HV* perl_get_hv (char* name, I32 create) - -=item perl_get_sv - -Returns the SV of the specified Perl scalar. If C<create> is set and the -Perl variable does not exist then it will be created. If C<create> is not -set and the variable does not exist then NULL is returned. - - SV* perl_get_sv (char* name, I32 create) - -=item perl_parse - -Tells a Perl interpreter to parse a Perl script. See L<perlembed>. - -=item perl_require_pv - -Tells Perl to C<require> a module. - - void perl_require_pv (char* pv) - -=item perl_run - -Tells a Perl interpreter to run. See L<perlembed>. - -=item POPi - -Pops an integer off the stack. - - int POPi() - -=item POPl - -Pops a long off the stack. - - long POPl() - -=item POPp - -Pops a string off the stack. - - char* POPp() - -=item POPn - -Pops a double off the stack. - - double POPn() - -=item POPs - -Pops an SV off the stack. - - SV* POPs() - -=item PUSHMARK - -Opening bracket for arguments on a callback. See C<PUTBACK> and L<perlcall>. - - PUSHMARK(p) - -=item PUSHi - -Push an integer onto the stack. The stack must have room for this element. -Handles 'set' magic. See C<XPUSHi>. - - void PUSHi(int d) - -=item PUSHn - -Push a double onto the stack. The stack must have room for this element. -Handles 'set' magic. See C<XPUSHn>. - - void PUSHn(double d) - -=item PUSHp - -Push a string onto the stack. The stack must have room for this element. -The C<len> indicates the length of the string. Handles 'set' magic. See -C<XPUSHp>. - - void PUSHp(char *c, int len ) - -=item PUSHs - -Push an SV onto the stack. The stack must have room for this element. Does -not handle 'set' magic. See C<XPUSHs>. - - void PUSHs(sv) - -=item PUSHu - -Push an unsigned integer onto the stack. The stack must have room for -this element. See C<XPUSHu>. - - void PUSHu(unsigned int d) - - -=item PUTBACK - -Closing bracket for XSUB arguments. This is usually handled by C<xsubpp>. -See C<PUSHMARK> and L<perlcall> for other uses. - - PUTBACK; - -=item Renew - -The XSUB-writer's interface to the C C<realloc> function. +=head1 How multiple interpreters and concurrency are supported + +WARNING: This information is subject to radical changes prior to +the Perl 5.6 release. Use with caution. + +=head2 Background and PERL_IMPLICIT_CONTEXT + +The Perl interpreter can be regarded as a closed box: it has an API +for feeding it code or otherwise making it do things, but it also has +functions for its own use. This smells a lot like an object, and +there are ways for you to build Perl so that you can have multiple +interpreters, with one interpreter represented either as a C++ object, +a C structure, or inside a thread. The thread, the C structure, or +the C++ object will contain all the context, the state of that +interpreter. + +Three macros control the major Perl build flavors: MULTIPLICITY, +USE_THREADS and PERL_OBJECT. The MULTIPLICITY build has a C structure +that packages all the interpreter state, there is a similar thread-specific +data structure under USE_THREADS, and the PERL_OBJECT build has a C++ +class to maintain interpreter state. In all three cases, +PERL_IMPLICIT_CONTEXT is also normally defined, and enables the +support for passing in a "hidden" first argument that represents all three +data structures. + +All this obviously requires a way for the Perl internal functions to be +C++ methods, subroutines taking some kind of structure as the first +argument, or subroutines taking nothing as the first argument. To +enable these three very different ways of building the interpreter, +the Perl source (as it does in so many other situations) makes heavy +use of macros and subroutine naming conventions. + +First problem: deciding which functions will be public API functions and +which will be private. All functions whose names begin C<S_> are private +(think "S" for "secret" or "static"). All other functions begin with +"Perl_", but just because a function begins with "Perl_" does not mean it is +part of the API. The easiest way to be B<sure> a function is part of the API +is to find its entry in L<perlapi>. If it exists in L<perlapi>, it's part +of the API. If it doesn't, and you think it should be (i.e., you need it fo +r your extension), send mail via L<perlbug> explaining why you think it +should be. + +(L<perlapi> itself is generated by embed.pl, a Perl script that generates +significant portions of the Perl source code. It has a list of almost +all the functions defined by the Perl interpreter along with their calling +characteristics and some flags. Functions that are part of the public API +are marked with an 'A' in its flags.) + +Second problem: there must be a syntax so that the same subroutine +declarations and calls can pass a structure as their first argument, +or pass nothing. To solve this, the subroutines are named and +declared in a particular way. Here's a typical start of a static +function used within the Perl guts: + + STATIC void + S_incline(pTHX_ char *s) + +STATIC becomes "static" in C, and is #define'd to nothing in C++. + +A public function (i.e. part of the internal API, but not necessarily +sanctioned for use in extensions) begins like this: + + void + Perl_sv_setsv(pTHX_ SV* dsv, SV* ssv) + +C<pTHX_> is one of a number of macros (in perl.h) that hide the +details of the interpreter's context. THX stands for "thread", "this", +or "thingy", as the case may be. (And no, George Lucas is not involved. :-) +The first character could be 'p' for a B<p>rototype, 'a' for B<a>rgument, +or 'd' for B<d>eclaration. + +When Perl is built without PERL_IMPLICIT_CONTEXT, there is no first +argument containing the interpreter's context. The trailing underscore +in the pTHX_ macro indicates that the macro expansion needs a comma +after the context argument because other arguments follow it. If +PERL_IMPLICIT_CONTEXT is not defined, pTHX_ will be ignored, and the +subroutine is not prototyped to take the extra argument. The form of the +macro without the trailing underscore is used when there are no additional +explicit arguments. + +When a core function calls another, it must pass the context. This +is normally hidden via macros. Consider C<sv_setsv>. It expands +something like this: + + ifdef PERL_IMPLICIT_CONTEXT + define sv_setsv(a,b) Perl_sv_setsv(aTHX_ a, b) + /* can't do this for vararg functions, see below */ + else + define sv_setsv Perl_sv_setsv + endif + +This works well, and means that XS authors can gleefully write: + + sv_setsv(foo, bar); + +and still have it work under all the modes Perl could have been +compiled with. + +Under PERL_OBJECT in the core, that will translate to either: + + CPerlObj::Perl_sv_setsv(foo,bar); # in CPerlObj functions, + # C++ takes care of 'this' + or + + pPerl->Perl_sv_setsv(foo,bar); # in truly static functions, + # see objXSUB.h + +Under PERL_OBJECT in extensions (aka PERL_CAPI), or under +MULTIPLICITY/USE_THREADS w/ PERL_IMPLICIT_CONTEXT in both core +and extensions, it will be: + + Perl_sv_setsv(aTHX_ foo, bar); # the canonical Perl "API" + # for all build flavors + +This doesn't work so cleanly for varargs functions, though, as macros +imply that the number of arguments is known in advance. Instead we +either need to spell them out fully, passing C<aTHX_> as the first +argument (the Perl core tends to do this with functions like +Perl_warner), or use a context-free version. + +The context-free version of Perl_warner is called +Perl_warner_nocontext, and does not take the extra argument. Instead +it does dTHX; to get the context from thread-local storage. We +C<#define warner Perl_warner_nocontext> so that extensions get source +compatibility at the expense of performance. (Passing an arg is +cheaper than grabbing it from thread-local storage.) + +You can ignore [pad]THX[xo] when browsing the Perl headers/sources. +Those are strictly for use within the core. Extensions and embedders +need only be aware of [pad]THX. - void* Renew( void *ptr, int size, type ) +=head2 How do I use all this in extensions? + +When Perl is built with PERL_IMPLICIT_CONTEXT, extensions that call +any functions in the Perl API will need to pass the initial context +argument somehow. The kicker is that you will need to write it in +such a way that the extension still compiles when Perl hasn't been +built with PERL_IMPLICIT_CONTEXT enabled. -=item Renewc +There are three ways to do this. First, the easy but inefficient way, +which is also the default, in order to maintain source compatibility +with extensions: whenever XSUB.h is #included, it redefines the aTHX +and aTHX_ macros to call a function that will return the context. +Thus, something like: -The XSUB-writer's interface to the C C<realloc> function, with cast. + sv_setsv(asv, bsv); - void* Renewc( void *ptr, int size, type, cast ) +in your extesion will translate to this when PERL_IMPLICIT_CONTEXT is +in effect: -=item RETVAL + Perl_sv_setsv(Perl_get_context(), asv, bsv); -Variable which is setup by C<xsubpp> to hold the return value for an XSUB. -This is always the proper type for the XSUB. -See L<perlxs/"The RETVAL Variable">. +or to this otherwise: -=item safefree + Perl_sv_setsv(asv, bsv); -The XSUB-writer's interface to the C C<free> function. +You have to do nothing new in your extension to get this; since +the Perl library provides Perl_get_context(), it will all just +work. + +The second, more efficient way is to use the following template for +your Foo.xs: -=item safemalloc + #define PERL_NO_GET_CONTEXT /* we want efficiency */ + #include "EXTERN.h" + #include "perl.h" + #include "XSUB.h" -The XSUB-writer's interface to the C C<malloc> function. + static my_private_function(int arg1, int arg2); -=item saferealloc + static SV * + my_private_function(int arg1, int arg2) + { + dTHX; /* fetch context */ + ... call many Perl API functions ... + } -The XSUB-writer's interface to the C C<realloc> function. + [... etc ...] -=item savepv + MODULE = Foo PACKAGE = Foo -Copy a string to a safe spot. This does not use an SV. + /* typical XSUB */ - char* savepv (char* sv) + void + my_xsub(arg) + int arg + CODE: + my_private_function(arg, 10); -=item savepvn +Note that the only two changes from the normal way of writing an +extension is the addition of a C<#define PERL_NO_GET_CONTEXT> before +including the Perl headers, followed by a C<dTHX;> declaration at +the start of every function that will call the Perl API. (You'll +know which functions need this, because the C compiler will complain +that there's an undeclared identifier in those functions.) No changes +are needed for the XSUBs themselves, because the XS() macro is +correctly defined to pass in the implicit context if needed. -Copy a string to a safe spot. The C<len> indicates number of bytes to -copy. This does not use an SV. +The third, even more efficient way is to ape how it is done within +the Perl guts: - char* savepvn (char* sv, I32 len) -=item SAVETMPS + #define PERL_NO_GET_CONTEXT /* we want efficiency */ + #include "EXTERN.h" + #include "perl.h" + #include "XSUB.h" -Opening bracket for temporaries on a callback. See C<FREETMPS> and -L<perlcall>. + /* pTHX_ only needed for functions that call Perl API */ + static my_private_function(pTHX_ int arg1, int arg2); - SAVETMPS; + static SV * + my_private_function(pTHX_ int arg1, int arg2) + { + /* dTHX; not needed here, because THX is an argument */ + ... call Perl API functions ... + } -=item SP + [... etc ...] -Stack pointer. This is usually handled by C<xsubpp>. See C<dSP> and -C<SPAGAIN>. + MODULE = Foo PACKAGE = Foo -=item SPAGAIN + /* typical XSUB */ -Refetch the stack pointer. Used after a callback. See L<perlcall>. + void + my_xsub(arg) + int arg + CODE: + my_private_function(aTHX_ arg, 10); - SPAGAIN; +This implementation never has to fetch the context using a function +call, since it is always passed as an extra argument. Depending on +your needs for simplicity or efficiency, you may mix the previous +two approaches freely. -=item ST +Never add a comma after C<pTHX> yourself--always use the form of the +macro with the underscore for functions that take explicit arguments, +or the form without the argument for functions with no explicit arguments. -Used to access elements on the XSUB's stack. +=head2 Future Plans and PERL_IMPLICIT_SYS - SV* ST(int x) +Just as PERL_IMPLICIT_CONTEXT provides a way to bundle up everything +that the interpreter knows about itself and pass it around, so too are +there plans to allow the interpreter to bundle up everything it knows +about the environment it's running on. This is enabled with the +PERL_IMPLICIT_SYS macro. Currently it only works with PERL_OBJECT, +but is mostly there for MULTIPLICITY and USE_THREADS (see inside +iperlsys.h). -=item strEQ - -Test two strings to see if they are equal. Returns true or false. - - int strEQ( char *s1, char *s2 ) - -=item strGE - -Test two strings to see if the first, C<s1>, is greater than or equal to the -second, C<s2>. Returns true or false. - - int strGE( char *s1, char *s2 ) - -=item strGT - -Test two strings to see if the first, C<s1>, is greater than the second, -C<s2>. Returns true or false. - - int strGT( char *s1, char *s2 ) - -=item strLE - -Test two strings to see if the first, C<s1>, is less than or equal to the -second, C<s2>. Returns true or false. - - int strLE( char *s1, char *s2 ) - -=item strLT - -Test two strings to see if the first, C<s1>, is less than the second, -C<s2>. Returns true or false. - - int strLT( char *s1, char *s2 ) - -=item strNE - -Test two strings to see if they are different. Returns true or false. - - int strNE( char *s1, char *s2 ) - -=item strnEQ - -Test two strings to see if they are equal. The C<len> parameter indicates -the number of bytes to compare. Returns true or false. - - int strnEQ( char *s1, char *s2 ) - -=item strnNE - -Test two strings to see if they are different. The C<len> parameter -indicates the number of bytes to compare. Returns true or false. - - int strnNE( char *s1, char *s2, int len ) - -=item sv_2mortal - -Marks an SV as mortal. The SV will be destroyed when the current context -ends. - - SV* sv_2mortal (SV* sv) - -=item sv_bless - -Blesses an SV into a specified package. The SV must be an RV. The package -must be designated by its stash (see C<gv_stashpv()>). The reference count -of the SV is unaffected. - - SV* sv_bless (SV* sv, HV* stash) - -=item sv_catpv - -Concatenates the string onto the end of the string which is in the SV. -Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>. - - void sv_catpv (SV* sv, char* ptr) - -=item sv_catpv_mg - -Like C<sv_catpv>, but also handles 'set' magic. - - void sv_catpv_mg (SV* sv, const char* ptr) - -=item sv_catpvn - -Concatenates the string onto the end of the string which is in the SV. The -C<len> indicates number of bytes to copy. Handles 'get' magic, but not -'set' magic. See C<sv_catpvn_mg>. - - void sv_catpvn (SV* sv, char* ptr, STRLEN len) - -=item sv_catpvn_mg - -Like C<sv_catpvn>, but also handles 'set' magic. - - void sv_catpvn_mg (SV* sv, char* ptr, STRLEN len) - -=item sv_catpvf - -Processes its arguments like C<sprintf> and appends the formatted output -to an SV. Handles 'get' magic, but not 'set' magic. C<SvSETMAGIC()> must -typically be called after calling this function to handle 'set' magic. - - void sv_catpvf (SV* sv, const char* pat, ...) - -=item sv_catpvf_mg - -Like C<sv_catpvf>, but also handles 'set' magic. - - void sv_catpvf_mg (SV* sv, const char* pat, ...) - -=item sv_catsv - -Concatenates the string from SV C<ssv> onto the end of the string in SV -C<dsv>. Handles 'get' magic, but not 'set' magic. See C<sv_catsv_mg>. - - void sv_catsv (SV* dsv, SV* ssv) - -=item sv_catsv_mg - -Like C<sv_catsv>, but also handles 'set' magic. - - void sv_catsv_mg (SV* dsv, SV* ssv) - -=item sv_chop - -Efficient removal of characters from the beginning of the string -buffer. SvPOK(sv) must be true and the C<ptr> must be a pointer to -somewhere inside the string buffer. The C<ptr> becomes the first -character of the adjusted string. - - void sv_chop(SV* sv, char *ptr) - - -=item sv_cmp - -Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the -string in C<sv1> is less than, equal to, or greater than the string in -C<sv2>. - - I32 sv_cmp (SV* sv1, SV* sv2) - -=item SvCUR - -Returns the length of the string which is in the SV. See C<SvLEN>. - - int SvCUR (SV* sv) - -=item SvCUR_set - -Set the length of the string which is in the SV. See C<SvCUR>. - - void SvCUR_set (SV* sv, int val) - -=item sv_dec - -Auto-decrement of the value in the SV. - - void sv_dec (SV* sv) - -=item sv_derived_from - -Returns a boolean indicating whether the SV is derived from the specified -class. This is the function that implements C<UNIVERSAL::isa>. It works -for class names as well as for objects. - - bool sv_derived_from _((SV* sv, char* name)); - -=item SvEND - -Returns a pointer to the last character in the string which is in the SV. -See C<SvCUR>. Access the character as - - char* SvEND(sv) - -=item sv_eq - -Returns a boolean indicating whether the strings in the two SVs are -identical. - - I32 sv_eq (SV* sv1, SV* sv2) - -=item SvGETMAGIC - -Invokes C<mg_get> on an SV if it has 'get' magic. This macro evaluates -its argument more than once. - - void SvGETMAGIC(SV *sv) - -=item SvGROW - -Expands the character buffer in the SV so that it has room for the -indicated number of bytes (remember to reserve space for an extra -trailing NUL character). Calls C<sv_grow> to perform the expansion if -necessary. Returns a pointer to the character buffer. - - char* SvGROW(SV* sv, STRLEN len) - -=item sv_grow - -Expands the character buffer in the SV. This will use C<sv_unref> and will -upgrade the SV to C<SVt_PV>. Returns a pointer to the character buffer. -Use C<SvGROW>. - -=item sv_inc - -Auto-increment of the value in the SV. - - void sv_inc (SV* sv) - -=item sv_insert - -Inserts a string at the specified offset/length within the SV. -Similar to the Perl substr() function. - - void sv_insert(SV *sv, STRLEN offset, STRLEN len, - char *str, STRLEN strlen) - -=item SvIOK - -Returns a boolean indicating whether the SV contains an integer. - - int SvIOK (SV* SV) - -=item SvIOK_off - -Unsets the IV status of an SV. - - void SvIOK_off (SV* sv) - -=item SvIOK_on - -Tells an SV that it is an integer. - - void SvIOK_on (SV* sv) - -=item SvIOK_only - -Tells an SV that it is an integer and disables all other OK bits. - - void SvIOK_only (SV* sv) - -=item SvIOKp - -Returns a boolean indicating whether the SV contains an integer. Checks the -B<private> setting. Use C<SvIOK>. - - int SvIOKp (SV* SV) - -=item sv_isa - -Returns a boolean indicating whether the SV is blessed into the specified -class. This does not check for subtypes; use C<sv_derived_from> to verify -an inheritance relationship. - - int sv_isa (SV* sv, char* name) - -=item sv_isobject - -Returns a boolean indicating whether the SV is an RV pointing to a blessed -object. If the SV is not an RV, or if the object is not blessed, then this -will return false. - - int sv_isobject (SV* sv) - -=item SvIV - -Coerces the given SV to an integer and returns it. - - int SvIV (SV* sv) - -=item SvIVX - -Returns the integer which is stored in the SV, assuming SvIOK is true. - - int SvIVX (SV* sv) - -=item SvLEN - -Returns the size of the string buffer in the SV. See C<SvCUR>. - - int SvLEN (SV* sv) - -=item sv_len - -Returns the length of the string in the SV. Use C<SvCUR>. - - STRLEN sv_len (SV* sv) - -=item sv_magic - -Adds magic to an SV. - - void sv_magic (SV* sv, SV* obj, int how, char* name, I32 namlen) - -=item sv_mortalcopy - -Creates a new SV which is a copy of the original SV. The new SV is marked -as mortal. - - SV* sv_mortalcopy (SV* oldsv) - -=item sv_newmortal - -Creates a new SV which is mortal. The reference count of the SV is set to 1. - - SV* sv_newmortal (void) - -=item SvNIOK - -Returns a boolean indicating whether the SV contains a number, integer or -double. - - int SvNIOK (SV* SV) - -=item SvNIOK_off - -Unsets the NV/IV status of an SV. - - void SvNIOK_off (SV* sv) - -=item SvNIOKp - -Returns a boolean indicating whether the SV contains a number, integer or -double. Checks the B<private> setting. Use C<SvNIOK>. - - int SvNIOKp (SV* SV) - -=item PL_sv_no - -This is the C<false> SV. See C<PL_sv_yes>. Always refer to this as C<&PL_sv_no>. - -=item SvNOK - -Returns a boolean indicating whether the SV contains a double. - - int SvNOK (SV* SV) - -=item SvNOK_off - -Unsets the NV status of an SV. - - void SvNOK_off (SV* sv) - -=item SvNOK_on - -Tells an SV that it is a double. - - void SvNOK_on (SV* sv) - -=item SvNOK_only - -Tells an SV that it is a double and disables all other OK bits. - - void SvNOK_only (SV* sv) - -=item SvNOKp - -Returns a boolean indicating whether the SV contains a double. Checks the -B<private> setting. Use C<SvNOK>. - - int SvNOKp (SV* SV) - -=item SvNV - -Coerce the given SV to a double and return it. - - double SvNV (SV* sv) - -=item SvNVX - -Returns the double which is stored in the SV, assuming SvNOK is true. - - double SvNVX (SV* sv) - -=item SvOK - -Returns a boolean indicating whether the value is an SV. - - int SvOK (SV* sv) - -=item SvOOK - -Returns a boolean indicating whether the SvIVX is a valid offset value -for the SvPVX. This hack is used internally to speed up removal of -characters from the beginning of a SvPV. When SvOOK is true, then the -start of the allocated string buffer is really (SvPVX - SvIVX). - - int SvOOK(SV* sv) - -=item SvPOK - -Returns a boolean indicating whether the SV contains a character string. - - int SvPOK (SV* SV) - -=item SvPOK_off - -Unsets the PV status of an SV. - - void SvPOK_off (SV* sv) - -=item SvPOK_on - -Tells an SV that it is a string. - - void SvPOK_on (SV* sv) - -=item SvPOK_only - -Tells an SV that it is a string and disables all other OK bits. - - void SvPOK_only (SV* sv) - -=item SvPOKp - -Returns a boolean indicating whether the SV contains a character string. -Checks the B<private> setting. Use C<SvPOK>. - - int SvPOKp (SV* SV) - -=item SvPV - -Returns a pointer to the string in the SV, or a stringified form of the SV -if the SV does not contain a string. Handles 'get' magic. - - char* SvPV (SV* sv, STRLEN len) - -=item SvPV_force - -Like <SvPV> but will force the SV into becoming a string (SvPOK). You -want force if you are going to update the SvPVX directly. - - char* SvPV_force(SV* sv, STRLEN len) - -=item SvPVX - -Returns a pointer to the string in the SV. The SV must contain a string. - - char* SvPVX (SV* sv) - -=item SvREFCNT - -Returns the value of the object's reference count. - - int SvREFCNT (SV* sv) - -=item SvREFCNT_dec - -Decrements the reference count of the given SV. - - void SvREFCNT_dec (SV* sv) - -=item SvREFCNT_inc - -Increments the reference count of the given SV. - - void SvREFCNT_inc (SV* sv) - -=item SvROK - -Tests if the SV is an RV. - - int SvROK (SV* sv) - -=item SvROK_off - -Unsets the RV status of an SV. - - void SvROK_off (SV* sv) - -=item SvROK_on - -Tells an SV that it is an RV. - - void SvROK_on (SV* sv) - -=item SvRV - -Dereferences an RV to return the SV. - - SV* SvRV (SV* sv) - -=item SvSETMAGIC - -Invokes C<mg_set> on an SV if it has 'set' magic. This macro evaluates -its argument more than once. - - void SvSETMAGIC( SV *sv ) - -=item sv_setiv - -Copies an integer into the given SV. Does not handle 'set' magic. -See C<sv_setiv_mg>. - - void sv_setiv (SV* sv, IV num) - -=item sv_setiv_mg - -Like C<sv_setiv>, but also handles 'set' magic. - - void sv_setiv_mg (SV* sv, IV num) - -=item sv_setnv - -Copies a double into the given SV. Does not handle 'set' magic. -See C<sv_setnv_mg>. - - void sv_setnv (SV* sv, double num) - -=item sv_setnv_mg - -Like C<sv_setnv>, but also handles 'set' magic. - - void sv_setnv_mg (SV* sv, double num) - -=item sv_setpv - -Copies a string into an SV. The string must be null-terminated. -Does not handle 'set' magic. See C<sv_setpv_mg>. - - void sv_setpv (SV* sv, const char* ptr) - -=item sv_setpv_mg - -Like C<sv_setpv>, but also handles 'set' magic. - - void sv_setpv_mg (SV* sv, const char* ptr) - -=item sv_setpviv - -Copies an integer into the given SV, also updating its string value. -Does not handle 'set' magic. See C<sv_setpviv_mg>. - - void sv_setpviv (SV* sv, IV num) - -=item sv_setpviv_mg - -Like C<sv_setpviv>, but also handles 'set' magic. - - void sv_setpviv_mg (SV* sv, IV num) - -=item sv_setpvn - -Copies a string into an SV. The C<len> parameter indicates the number of -bytes to be copied. Does not handle 'set' magic. See C<sv_setpvn_mg>. - - void sv_setpvn (SV* sv, const char* ptr, STRLEN len) - -=item sv_setpvn_mg - -Like C<sv_setpvn>, but also handles 'set' magic. - - void sv_setpvn_mg (SV* sv, const char* ptr, STRLEN len) - -=item sv_setpvf - -Processes its arguments like C<sprintf> and sets an SV to the formatted -output. Does not handle 'set' magic. See C<sv_setpvf_mg>. - - void sv_setpvf (SV* sv, const char* pat, ...) - -=item sv_setpvf_mg - -Like C<sv_setpvf>, but also handles 'set' magic. - - void sv_setpvf_mg (SV* sv, const char* pat, ...) - -=item sv_setref_iv - -Copies an integer into a new SV, optionally blessing the SV. The C<rv> -argument will be upgraded to an RV. That RV will be modified to point to -the new SV. The C<classname> argument indicates the package for the -blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV -will be returned and will have a reference count of 1. - - SV* sv_setref_iv (SV *rv, char *classname, IV iv) - -=item sv_setref_nv - -Copies a double into a new SV, optionally blessing the SV. The C<rv> -argument will be upgraded to an RV. That RV will be modified to point to -the new SV. The C<classname> argument indicates the package for the -blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV -will be returned and will have a reference count of 1. - - SV* sv_setref_nv (SV *rv, char *classname, double nv) - -=item sv_setref_pv - -Copies a pointer into a new SV, optionally blessing the SV. The C<rv> -argument will be upgraded to an RV. That RV will be modified to point to -the new SV. If the C<pv> argument is NULL then C<PL_sv_undef> will be placed -into the SV. The C<classname> argument indicates the package for the -blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV -will be returned and will have a reference count of 1. - - SV* sv_setref_pv (SV *rv, char *classname, void* pv) - -Do not use with integral Perl types such as HV, AV, SV, CV, because those -objects will become corrupted by the pointer copy process. - -Note that C<sv_setref_pvn> copies the string while this copies the pointer. - -=item sv_setref_pvn - -Copies a string into a new SV, optionally blessing the SV. The length of the -string must be specified with C<n>. The C<rv> argument will be upgraded to -an RV. That RV will be modified to point to the new SV. The C<classname> -argument indicates the package for the blessing. Set C<classname> to -C<Nullch> to avoid the blessing. The new SV will be returned and will have -a reference count of 1. - - SV* sv_setref_pvn (SV *rv, char *classname, char* pv, I32 n) - -Note that C<sv_setref_pv> copies the pointer while this copies the string. - -=item SvSetSV - -Calls C<sv_setsv> if dsv is not the same as ssv. May evaluate arguments -more than once. - - void SvSetSV (SV* dsv, SV* ssv) - -=item SvSetSV_nosteal - -Calls a non-destructive version of C<sv_setsv> if dsv is not the same as ssv. -May evaluate arguments more than once. - - void SvSetSV_nosteal (SV* dsv, SV* ssv) - -=item sv_setsv - -Copies the contents of the source SV C<ssv> into the destination SV C<dsv>. -The source SV may be destroyed if it is mortal. Does not handle 'set' magic. -See the macro forms C<SvSetSV>, C<SvSetSV_nosteal> and C<sv_setsv_mg>. - - void sv_setsv (SV* dsv, SV* ssv) - -=item sv_setsv_mg - -Like C<sv_setsv>, but also handles 'set' magic. - - void sv_setsv_mg (SV* dsv, SV* ssv) - -=item sv_setuv - -Copies an unsigned integer into the given SV. Does not handle 'set' magic. -See C<sv_setuv_mg>. - - void sv_setuv (SV* sv, UV num) - -=item sv_setuv_mg - -Like C<sv_setuv>, but also handles 'set' magic. - - void sv_setuv_mg (SV* sv, UV num) - -=item SvSTASH - -Returns the stash of the SV. - - HV* SvSTASH (SV* sv) - -=item SvTAINT - -Taints an SV if tainting is enabled - - void SvTAINT (SV* sv) - -=item SvTAINTED - -Checks to see if an SV is tainted. Returns TRUE if it is, FALSE if not. - - int SvTAINTED (SV* sv) - -=item SvTAINTED_off - -Untaints an SV. Be I<very> careful with this routine, as it short-circuits -some of Perl's fundamental security features. XS module authors should -not use this function unless they fully understand all the implications -of unconditionally untainting the value. Untainting should be done in -the standard perl fashion, via a carefully crafted regexp, rather than -directly untainting variables. - - void SvTAINTED_off (SV* sv) - -=item SvTAINTED_on - -Marks an SV as tainted. - - void SvTAINTED_on (SV* sv) - -=item SVt_IV - -Integer type flag for scalars. See C<svtype>. - -=item SVt_PV - -Pointer type flag for scalars. See C<svtype>. - -=item SVt_PVAV - -Type flag for arrays. See C<svtype>. - -=item SVt_PVCV - -Type flag for code refs. See C<svtype>. - -=item SVt_PVHV - -Type flag for hashes. See C<svtype>. - -=item SVt_PVMG - -Type flag for blessed scalars. See C<svtype>. - -=item SVt_NV - -Double type flag for scalars. See C<svtype>. - -=item SvTRUE - -Returns a boolean indicating whether Perl would evaluate the SV as true or -false, defined or undefined. Does not handle 'get' magic. - - int SvTRUE (SV* sv) - -=item SvTYPE - -Returns the type of the SV. See C<svtype>. - - svtype SvTYPE (SV* sv) - -=item svtype - -An enum of flags for Perl types. These are found in the file B<sv.h> in the -C<svtype> enum. Test these flags with the C<SvTYPE> macro. - -=item PL_sv_undef - -This is the C<undef> SV. Always refer to this as C<&PL_sv_undef>. - -=item sv_unref - -Unsets the RV status of the SV, and decrements the reference count of -whatever was being referenced by the RV. This can almost be thought of -as a reversal of C<newSVrv>. See C<SvROK_off>. - - void sv_unref (SV* sv) - -=item SvUPGRADE - -Used to upgrade an SV to a more complex form. Uses C<sv_upgrade> to perform -the upgrade if necessary. See C<svtype>. - - bool SvUPGRADE (SV* sv, svtype mt) - -=item sv_upgrade - -Upgrade an SV to a more complex form. Use C<SvUPGRADE>. See C<svtype>. - -=item sv_usepvn - -Tells an SV to use C<ptr> to find its string value. Normally the string is -stored inside the SV but sv_usepvn allows the SV to use an outside string. -The C<ptr> should point to memory that was allocated by C<malloc>. The -string length, C<len>, must be supplied. This function will realloc the -memory pointed to by C<ptr>, so that pointer should not be freed or used by -the programmer after giving it to sv_usepvn. Does not handle 'set' magic. -See C<sv_usepvn_mg>. - - void sv_usepvn (SV* sv, char* ptr, STRLEN len) - -=item sv_usepvn_mg - -Like C<sv_usepvn>, but also handles 'set' magic. - - void sv_usepvn_mg (SV* sv, char* ptr, STRLEN len) - -=item sv_vcatpvfn(sv, pat, patlen, args, svargs, svmax, used_locale) - -Processes its arguments like C<vsprintf> and appends the formatted output -to an SV. Uses an array of SVs if the C style variable argument list is -missing (NULL). Indicates if locale information has been used for formatting. - - void sv_catpvfn _((SV* sv, const char* pat, STRLEN patlen, - va_list *args, SV **svargs, I32 svmax, - bool *used_locale)); - -=item sv_vsetpvfn(sv, pat, patlen, args, svargs, svmax, used_locale) - -Works like C<vcatpvfn> but copies the text into the SV instead of -appending it. - - void sv_setpvfn _((SV* sv, const char* pat, STRLEN patlen, - va_list *args, SV **svargs, I32 svmax, - bool *used_locale)); - -=item SvUV - -Coerces the given SV to an unsigned integer and returns it. - - UV SvUV(SV* sv) - -=item SvUVX - -Returns the unsigned integer which is stored in the SV, assuming SvIOK is true. - - UV SvUVX(SV* sv) - -=item PL_sv_yes - -This is the C<true> SV. See C<PL_sv_no>. Always refer to this as C<&PL_sv_yes>. - -=item THIS - -Variable which is setup by C<xsubpp> to designate the object in a C++ XSUB. -This is always the proper type for the C++ object. See C<CLASS> and -L<perlxs/"Using XS With C++">. - -=item toLOWER - -Converts the specified character to lowercase. - - int toLOWER (char c) - -=item toUPPER - -Converts the specified character to uppercase. - - int toUPPER (char c) - -=item warn - -This is the XSUB-writer's interface to Perl's C<warn> function. Use this -function the same way you use the C C<printf> function. See C<croak()>. - -=item XPUSHi - -Push an integer onto the stack, extending the stack if necessary. Handles -'set' magic. See C<PUSHi>. - - XPUSHi(int d) - -=item XPUSHn - -Push a double onto the stack, extending the stack if necessary. Handles 'set' -magic. See C<PUSHn>. - - XPUSHn(double d) - -=item XPUSHp - -Push a string onto the stack, extending the stack if necessary. The C<len> -indicates the length of the string. Handles 'set' magic. See C<PUSHp>. - - XPUSHp(char *c, int len) - -=item XPUSHs - -Push an SV onto the stack, extending the stack if necessary. Does not -handle 'set' magic. See C<PUSHs>. - - XPUSHs(sv) - -=item XPUSHu - -Push an unsigned integer onto the stack, extending the stack if -necessary. See C<PUSHu>. - -=item XS - -Macro to declare an XSUB and its C parameter list. This is handled by -C<xsubpp>. - -=item XSRETURN - -Return from XSUB, indicating number of items on the stack. This is usually -handled by C<xsubpp>. - - XSRETURN(int x) - -=item XSRETURN_EMPTY - -Return an empty list from an XSUB immediately. - - XSRETURN_EMPTY; - -=item XSRETURN_IV - -Return an integer from an XSUB immediately. Uses C<XST_mIV>. - - XSRETURN_IV(IV v) - -=item XSRETURN_NO - -Return C<&PL_sv_no> from an XSUB immediately. Uses C<XST_mNO>. - - XSRETURN_NO; - -=item XSRETURN_NV - -Return an double from an XSUB immediately. Uses C<XST_mNV>. - - XSRETURN_NV(NV v) - -=item XSRETURN_PV - -Return a copy of a string from an XSUB immediately. Uses C<XST_mPV>. - - XSRETURN_PV(char *v) - -=item XSRETURN_UNDEF - -Return C<&PL_sv_undef> from an XSUB immediately. Uses C<XST_mUNDEF>. - - XSRETURN_UNDEF; - -=item XSRETURN_YES - -Return C<&PL_sv_yes> from an XSUB immediately. Uses C<XST_mYES>. - - XSRETURN_YES; - -=item XST_mIV - -Place an integer into the specified position C<i> on the stack. The value is -stored in a new mortal SV. - - XST_mIV( int i, IV v ) - -=item XST_mNV - -Place a double into the specified position C<i> on the stack. The value is -stored in a new mortal SV. - - XST_mNV( int i, NV v ) - -=item XST_mNO - -Place C<&PL_sv_no> into the specified position C<i> on the stack. - - XST_mNO( int i ) - -=item XST_mPV - -Place a copy of a string into the specified position C<i> on the stack. The -value is stored in a new mortal SV. - - XST_mPV( int i, char *v ) - -=item XST_mUNDEF - -Place C<&PL_sv_undef> into the specified position C<i> on the stack. - - XST_mUNDEF( int i ) - -=item XST_mYES - -Place C<&PL_sv_yes> into the specified position C<i> on the stack. - - XST_mYES( int i ) - -=item XS_VERSION - -The version identifier for an XS module. This is usually handled -automatically by C<ExtUtils::MakeMaker>. See C<XS_VERSION_BOOTCHECK>. - -=item XS_VERSION_BOOTCHECK - -Macro to verify that a PM module's $VERSION variable matches the XS module's -C<XS_VERSION> variable. This is usually handled automatically by -C<xsubpp>. See L<perlxs/"The VERSIONCHECK: Keyword">. - -=item Zero - -The XSUB-writer's interface to the C C<memzero> function. The C<d> is the -destination, C<n> is the number of items, and C<t> is the type. - - void Zero( d, n, t ) - -=back +This allows the ability to provide an extra pointer (called the "host" +environment) for all the system calls. This makes it possible for +all the system stuff to maintain their own state, broken down into +seven C structures. These are thin wrappers around the usual system +calls (see win32/perllib.c) for the default perl executable, but for a +more ambitious host (like the one that would do fork() emulation) all +the extra work needed to pretend that different interpreters are +actually different "processes", would be done here. + +The Perl engine/interpreter and the host are orthogonal entities. +There could be one or more interpreters in a process, and one or +more "hosts", with free association between them. =head1 AUTHORS Until May 1997, this document was maintained by Jeff Okamoto -<okamoto@corp.hp.com>. It is now maintained as part of Perl itself. +<okamoto@corp.hp.com>. It is now maintained as part of Perl itself +by the Perl 5 Porters <perl5-porters@perl.org>. With lots of help and suggestions from Dean Roehrich, Malcolm Beattie, Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil @@ -3594,3 +1794,10 @@ Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer, Stephen McCamant, and Gurusamy Sarathy. API Listing originally by Dean Roehrich <roehrich@cray.com>. + +Modifications to autogenerate the API listing (L<perlapi>) by Benjamin +Stuhl. + +=head1 SEE ALSO + +perlapi(1), perlintern(1), perlxs(1), perlembed(1) diff --git a/contrib/perl5/pod/perlhack.pod b/contrib/perl5/pod/perlhack.pod new file mode 100644 index 0000000..c640870 --- /dev/null +++ b/contrib/perl5/pod/perlhack.pod @@ -0,0 +1,292 @@ +=head1 NAME + +perlhack - How to hack at the Perl internals + +=head1 DESCRIPTION + +This document attempts to explain how Perl development takes place, +and ends with some suggestions for people wanting to become bona fide +porters. + +The perl5-porters mailing list is where the Perl standard distribution +is maintained and developed. The list can get anywhere from 10 to 150 +messages a day, depending on the heatedness of the debate. Most days +there are two or three patches, extensions, features, or bugs being +discussed at a time. + +A searchable archive of the list is at: + + http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/ + +The list is also archived under the usenet group name +C<perl.porters-gw> at: + + http://www.deja.com/ + +List subscribers (the porters themselves) come in several flavours. +Some are quiet curious lurkers, who rarely pitch in and instead watch +the ongoing development to ensure they're forewarned of new changes or +features in Perl. Some are representatives of vendors, who are there +to make sure that Perl continues to compile and work on their +platforms. Some patch any reported bug that they know how to fix, +some are actively patching their pet area (threads, Win32, the regexp +engine), while others seem to do nothing but complain. In other +words, it's your usual mix of technical people. + +Over this group of porters presides Larry Wall. He has the final word +in what does and does not change in the Perl language. Various +releases of Perl are shepherded by a ``pumpking'', a porter +responsible for gathering patches, deciding on a patch-by-patch +feature-by-feature basis what will and will not go into the release. +For instance, Gurusamy Sarathy is the pumpking for the 5.6 release of +Perl. + +In addition, various people are pumpkings for different things. For +instance, Andy Dougherty and Jarkko Hietaniemi share the I<Configure> +pumpkin, and Tom Christiansen is the documentation pumpking. + +Larry sees Perl development along the lines of the US government: +there's the Legislature (the porters), the Executive branch (the +pumpkings), and the Supreme Court (Larry). The legislature can +discuss and submit patches to the executive branch all they like, but +the executive branch is free to veto them. Rarely, the Supreme Court +will side with the executive branch over the legislature, or the +legislature over the executive branch. Mostly, however, the +legislature and the executive branch are supposed to get along and +work out their differences without impeachment or court cases. + +You might sometimes see reference to Rule 1 and Rule 2. Larry's power +as Supreme Court is expressed in The Rules: + +=over 4 + +=item 1 + +Larry is always by definition right about how Perl should behave. +This means he has final veto power on the core functionality. + +=item 2 + +Larry is allowed to change his mind about any matter at a later date, +regardless of whether he previously invoked Rule 1. + +=back + +Got that? Larry is always right, even when he was wrong. It's rare +to see either Rule exercised, but they are often alluded to. + +New features and extensions to the language are contentious, because +the criteria used by the pumpkings, Larry, and other porters to decide +which features should be implemented and incorporated are not codified +in a few small design goals as with some other languages. Instead, +the heuristics are flexible and often difficult to fathom. Here is +one person's list, roughly in decreasing order of importance, of +heuristics that new features have to be weighed against: + +=over 4 + +=item Does concept match the general goals of Perl? + +These haven't been written anywhere in stone, but one approximation +is: + + 1. Keep it fast, simple, and useful. + 2. Keep features/concepts as orthogonal as possible. + 3. No arbitrary limits (platforms, data sizes, cultures). + 4. Keep it open and exciting to use/patch/advocate Perl everywhere. + 5. Either assimilate new technologies, or build bridges to them. + +=item Where is the implementation? + +All the talk in the world is useless without an implementation. In +almost every case, the person or people who argue for a new feature +will be expected to be the ones who implement it. Porters capable +of coding new features have their own agendas, and are not available +to implement your (possibly good) idea. + +=item Backwards compatibility + +It's a cardinal sin to break existing Perl programs. New warnings are +contentious--some say that a program that emits warnings is not +broken, while others say it is. Adding keywords has the potential to +break programs, changing the meaning of existing token sequences or +functions might break programs. + +=item Could it be a module instead? + +Perl 5 has extension mechanisms, modules and XS, specifically to avoid +the need to keep changing the Perl interpreter. You can write modules +that export functions, you can give those functions prototypes so they +can be called like built-in functions, you can even write XS code to +mess with the runtime data structures of the Perl interpreter if you +want to implement really complicated things. If it can be done in a +module instead of in the core, it's highly unlikely to be added. + +=item Is the feature generic enough? + +Is this something that only the submitter wants added to the language, +or would it be broadly useful? Sometimes, instead of adding a feature +with a tight focus, the porters might decide to wait until someone +implements the more generalized feature. For instance, instead of +implementing a ``delayed evaluation'' feature, the porters are waiting +for a macro system that would permit delayed evaluation and much more. + +=item Does it potentially introduce new bugs? + +Radical rewrites of large chunks of the Perl interpreter have the +potential to introduce new bugs. The smaller and more localized the +change, the better. + +=item Does it preclude other desirable features? + +A patch is likely to be rejected if it closes off future avenues of +development. For instance, a patch that placed a true and final +interpretation on prototypes is likely to be rejected because there +are still options for the future of prototypes that haven't been +addressed. + +=item Is the implementation robust? + +Good patches (tight code, complete, correct) stand more chance of +going in. Sloppy or incorrect patches might be placed on the back +burner until the pumpking has time to fix, or might be discarded +altogether without further notice. + +=item Is the implementation generic enough to be portable? + +The worst patches make use of a system-specific features. It's highly +unlikely that nonportable additions to the Perl language will be +accepted. + +=item Is there enough documentation? + +Patches without documentation are probably ill-thought out or +incomplete. Nothing can be added without documentation, so submitting +a patch for the appropriate manpages as well as the source code is +always a good idea. If appropriate, patches should add to the test +suite as well. + +=item Is there another way to do it? + +Larry said ``Although the Perl Slogan is I<There's More Than One Way +to Do It>, I hesitate to make 10 ways to do something''. This is a +tricky heuristic to navigate, though--one man's essential addition is +another man's pointless cruft. + +=item Does it create too much work? + +Work for the pumpking, work for Perl programmers, work for module +authors, ... Perl is supposed to be easy. + +=item Patches speak louder than words + +Working code is always preferred to pie-in-the-sky ideas. A patch to +add a feature stands a much higher chance of making it to the language +than does a random feature request, no matter how fervently argued the +request might be. This ties into ``Will it be useful?'', as the fact +that someone took the time to make the patch demonstrates a strong +desire for the feature. + +=back + +If you're on the list, you might hear the word ``core'' bandied +around. It refers to the standard distribution. ``Hacking on the +core'' means you're changing the C source code to the Perl +interpreter. ``A core module'' is one that ships with Perl. + +The source code to the Perl interpreter, in its different versions, is +kept in a repository managed by a revision control system (which is +currently the Perforce program, see http://perforce.com/). The +pumpkings and a few others have access to the repository to check in +changes. Periodically the pumpking for the development version of Perl +will release a new version, so the rest of the porters can see what's +changed. The current state of the main trunk of repository, and patches +that describe the individual changes that have happened since the last +public release are available at this location: + + ftp://ftp.linux.activestate.com/pub/staff/gsar/APC/ + +Selective parts are also visible via the rsync protocol. To get all +the individual changes to the mainline since the last development +release, use the following command: + + rsync -avuz rsync://ftp.linux.activestate.com/perl-diffs perl-diffs + +Use this to get the latest source tree in full: + + rsync -avuz rsync://ftp.linux.activestate.com/perl-current perl-current + +Needless to say, the source code in perl-current is usually in a perpetual +state of evolution. You should expect it to be very buggy. Do B<not> use +it for any purpose other than testing and development. + +Always submit patches to I<perl5-porters@perl.org>. This lets other +porters review your patch, which catches a surprising number of errors +in patches. Either use the diff program (available in source code +form from I<ftp://ftp.gnu.org/pub/gnu/>), or use Johan Vromans' +I<makepatch> (available from I<CPAN/authors/id/JV/>). Unified diffs +are preferred, but context diffs are accepted. Do not send RCS-style +diffs or diffs without context lines. More information is given in +the I<Porting/patching.pod> file in the Perl source distribution. +Please patch against the latest B<development> version (e.g., if +you're fixing a bug in the 5.005 track, patch against the latest +5.005_5x version). Only patches that survive the heat of the +development branch get applied to maintenance versions. + +Your patch should update the documentation and test suite. + +To report a bug in Perl, use the program I<perlbug> which comes with +Perl (if you can't get Perl to work, send mail to the address +I<perlbug@perl.com> or I<perlbug@perl.org>). Reporting bugs through +I<perlbug> feeds into the automated bug-tracking system, access to +which is provided through the web at I<http://bugs.perl.org/>. It +often pays to check the archives of the perl5-porters mailing list to +see whether the bug you're reporting has been reported before, and if +so whether it was considered a bug. See above for the location of +the searchable archives. + +The CPAN testers (I<http://testers.cpan.org/>) are a group of +volunteers who test CPAN modules on a variety of platforms. Perl Labs +(I<http://labs.perl.org/>) automatically tests Perl source releases on +platforms and gives feedback to the CPAN testers mailing list. Both +efforts welcome volunteers. + +To become an active and patching Perl porter, you'll need to learn how +Perl works on the inside. Chip Salzenberg, a pumpking, has written +articles on Perl internals for The Perl Journal +(I<http://www.tpj.com/>) which explain how various parts of the Perl +interpreter work. The C<perlguts> manpage explains the internal data +structures. And, of course, the C source code (sometimes sparsely +commented, sometimes commented well) is a great place to start (begin +with C<perl.c> and see where it goes from there). A lot of the style +of the Perl source is explained in the I<Porting/pumpkin.pod> file in +the source distribution. + +It is essential that you be comfortable using a good debugger +(e.g. gdb, dbx) before you can patch perl. Stepping through perl +as it executes a script is perhaps the best (if sometimes tedious) +way to gain a precise understanding of the overall architecture of +the language. + +If you build a version of the Perl interpreter with C<-DDEBUGGING>, +Perl's B<-D> command line flag will cause copious debugging information +to be emitted (see the C<perlrun> manpage). If you build a version of +Perl with compiler debugging information (e.g. with the C compiler's +C<-g> option instead of C<-O>) then you can step through the execution +of the interpreter with your favourite C symbolic debugger, setting +breakpoints on particular functions. + +It's a good idea to read and lurk for a while before chipping in. +That way you'll get to see the dynamic of the conversations, learn the +personalities of the players, and hopefully be better prepared to make +a useful contribution when do you speak up. + +If after all this you still think you want to join the perl5-porters +mailing list, send mail to I<perl5-porters-subscribe@perl.org>. To +unsubscribe, send mail to I<perl5-porters-unsubscribe@perl.org>. + +=head1 AUTHOR + +This document was written by Nathan Torkington, and is maintained by +the perl5-porters mailing list. + diff --git a/contrib/perl5/pod/perlhist.pod b/contrib/perl5/pod/perlhist.pod index 5828ea4..4311ee2 100644 --- a/contrib/perl5/pod/perlhist.pod +++ b/contrib/perl5/pod/perlhist.pod @@ -4,10 +4,12 @@ perlhist - the Perl history records +=for RCS + =begin RCS # -# $Id: perlhist.pod,v 1.57 1999/01/26 17:38:07 jhi Exp $ +# $Id: perlhist.pod,v 1.2 2000/01/24 11:44:47 jhi Exp $ # =end RCS @@ -117,16 +119,16 @@ the strings?). 5.000alpha4 1993-???-?? 5.000alpha5 1993-???-?? 5.000alpha6 1994-Mar-18 - 5.003alpha7 1994-Mar-25 + 5.000alpha7 1994-Mar-25 Andy 5.000alpha8 1994-Apr-04 Larry 5.000alpha9 1994-May-05 ext appears. - 5.000alpha10 1994-???-?? - 5.000alpha11 1994-???-?? + 5.000alpha10 1994-Jun-11 + 5.000alpha11 1994-Jul-01 Andy 5.000a11a 1994-Jul-07 To fit 14. 5.000a11b 1994-Jul-14 5.000a11c 1994-Jul-19 5.000a11d 1994-Jul-22 - Larry 5.000alpha12 1994-???-?? + Larry 5.000alpha12 1994-Aug-04 Andy 5.000a12a 1994-Aug-08 5.000a12b 1994-Aug-15 5.000a12c 1994-Aug-22 @@ -135,8 +137,8 @@ the strings?). 5.000a12f 1994-Aug-24 5.000a12g 1994-Aug-24 5.000a12h 1994-Aug-24 - Larry 5.000beta1 1994-???-?? - Andy 5.000b1a 1994-???-?? + Larry 5.000beta1 1994-Aug-30 + Andy 5.000b1a 1994-Sep-06 Larry 5.000beta2 1994-Sep-14 Core slushified. Andy 5.000b2a 1994-Sep-14 5.000b2b 1994-Sep-17 @@ -150,8 +152,9 @@ the strings?). 5.000b3f 1994-Sep-30 5.000b3g 1994-Oct-04 Andy 5.000b3h 1994-Oct-07 + Larry? 5.000gamma 1994-Oct-13? - Larry 5.000 1994-Oct-18 + Larry 5.000 1994-Oct-17 Andy 5.000a 1994-Dec-19 5.000b 1995-Jan-18 @@ -165,8 +168,9 @@ the strings?). 5.000j 1995-Feb-07 5.000k 1995-Feb-11 5.000l 1995-Feb-21 - 5.000m 1995-???-?? + 5.000m 1995-Feb-28 5.000n 1995-Mar-07 + 5.000o 1995-Mar-13? Larry 5.001 1995-Mar-13 @@ -185,7 +189,7 @@ the strings?). 5.001m 1995-Jul-02 Very stable. 5.001n 1995-Oct-31 Very unstable. 5.002beta1 1995-Nov-21 - 5.002b1a 1995-Nov-?? + 5.002b1a 1995-Dec-04 5.002b1b 1995-Dec-04 5.002b1c 1995-Dec-04 5.002b1d 1995-Dec-04 @@ -270,7 +274,8 @@ the strings?). 5.004_04-MT6 1998-Oct-09 5.004_04-MT7 1998-Nov-22 5.004_04-MT8 1998-Dec-03 - 5.004_04-MT9 1999-***-** + Chip 5.004_04-MT9 1999-Apr-26 + 5.004_05 1999-Apr-29 Malcolm 5.004_50 1997-Sep-09 The 5.005 development track. 5.004_51 1997-Oct-02 @@ -310,16 +315,31 @@ the strings?). 5.005_03-MT3 1999-Jan-17 5.005_03-MT4 1999-Jan-26 5.005_03-MT5 1999-Jan-28 - 5.005_03-MT6 1999-Mar-04 5.005_03 1999-Mar-28 + Chip 5.005_04 2000-***-** - Sarathy 5.005_50 1998-Jul-26 The 5.006 development track. + Sarathy 5.005_50 1998-Jul-26 The 5.6 development track. 5.005_51 1998-Aug-10 5.005_52 1998-Sep-25 5.005_53 1998-Oct-31 5.005_54 1998-Nov-30 5.005_55 1999-Feb-16 5.005_56 1999-Mar-01 + 5.005_57 1999-May-25 + 5.005_58 1999-Jul-27 + 5.005_59 1999-Aug-02 + 5.005_60 1999-Aug-02 + 5.005_61 1999-Aug-20 + 5.005_62 1999-Oct-15 + 5.005_63 1999-Dec-09 + 5.5.640 2000-Feb-02 + 5.5.650 2000-Feb-08 beta1 + 5.5.660 2000-Feb-22 beta2 + 5.5.670 2000-Feb-29 beta3 + 5.6.0-RC1 2000-Mar-09 release candidate 1 + 5.6.0-RC2 2000-Mar-14 release candidate 2 + 5.6.0-RC3 2000-Mar-21 release candidate 3 + 5.6.0 2000-Mar-22 =head2 SELECTED RELEASE SIZES @@ -352,6 +372,7 @@ explained below. 5.004 1351 60 1230 136 408 51 355 161 1587 55 5.004_01 1356 60 1258 138 410 51 358 161 1587 55 5.004_04 1375 60 1294 139 413 51 394 162 1629 55 + 5.004_05 1463 60 1435 150 394 50 445 175 1855 59 5.004_51 1401 61 1260 140 413 53 358 162 1594 56 5.004_53 1422 62 1295 141 438 70 394 162 1637 56 5.004_56 1501 66 1301 140 447 74 408 165 1648 57 @@ -363,6 +384,10 @@ explained below. 5.004_73 1874 76 1467 152 762 102 506 196 1883 61 5.004_75 1877 76 1467 152 770 103 508 196 1896 62 5.005 1896 76 1469 152 795 103 509 197 1945 63 + 5.005_03 1936 77 1541 153 813 104 551 201 2176 72 + 5.005_50 1969 78 1842 301 795 103 514 198 1948 63 + 5.005_53 1999 79 1885 303 806 104 602 224 2002 67 + 5.005_56 2086 79 1970 307 866 113 672 238 2221 75 The "core"..."doc" mean the following files from the Perl source code distribution. The glob notation ** means recursively, (.) means @@ -439,25 +464,28 @@ the Perl source distribution for somewhat more selected releases. ====================================================================== - 5.004_70 5.004_73 5.004_75 5.005 - - beos 1 1 1 1 1 1 1 1 - Configure 256 1 256 1 264 1 264 1 - cygwin32 24 5 24 5 24 5 24 5 - djgpp 14 5 14 5 14 5 14 5 - eg 86 65 86 65 86 65 86 65 - emacs 262 2 262 2 262 2 262 2 - h2pl 12 12 12 12 12 12 12 12 - hints 157 74 157 74 159 74 160 74 - mpeix - - - - 5 3 5 3 - os2 129 44 139 44 142 44 143 44 - plan9 82 15 82 15 82 15 82 15 - Porting 241 9 253 9 259 10 264 12 - qnx 1 2 1 2 1 2 1 2 - utils 160 9 160 9 160 9 160 9 - vms 570 34 572 34 573 34 575 34 - win32 577 41 585 41 585 41 587 41 - x2p 281 19 281 19 281 19 281 19 + 5.004_70 5.004_73 5.004_75 5.005 5.005_03 + + apollo - - - - - - - - 0 1 + beos 1 1 1 1 1 1 1 1 1 1 + Configure 256 1 256 1 264 1 264 1 270 1 + cygwin32 24 5 24 5 24 5 24 5 24 5 + djgpp 14 5 14 5 14 5 14 5 15 5 + eg 86 65 86 65 86 65 86 65 86 65 + emacs 262 2 262 2 262 2 262 2 274 2 + h2pl 12 12 12 12 12 12 12 12 12 12 + hints 157 74 157 74 159 74 160 74 179 77 + mint - - - - - - - - 4 7 + mpeix - - - - 5 3 5 3 5 3 + os2 129 44 139 44 142 44 143 44 148 44 + plan9 82 15 82 15 82 15 82 15 82 15 + Porting 241 9 253 9 259 10 264 12 272 13 + qnx 1 2 1 2 1 2 1 2 1 2 + utils 160 9 160 9 160 9 160 9 164 9 + vms 570 34 572 34 573 34 575 34 583 34 + vos - - - - - - - - 156 10 + win32 577 41 585 41 585 41 587 41 600 42 + x2p 281 19 281 19 281 19 281 19 281 19 =head2 SELECTED PATCH SIZES @@ -531,7 +559,7 @@ Jarkko Hietaniemi <F<jhi@iki.fi>>. Thanks to the collective memory of the Perlfolk. In addition to the Keepers of the Pumpkin also Alan Champion, Andreas König, John -Macdonald, Matthias Neeracher, Michael Peppler, Randal Schwartz, and -Paul D. Smith sent corrections and additions. +Macdonald, Matthias Neeracher, Jeff Okamoto, Michael Peppler, +Randal Schwartz, and Paul D. Smith sent corrections and additions. =cut diff --git a/contrib/perl5/pod/perlintern.pod b/contrib/perl5/pod/perlintern.pod new file mode 100644 index 0000000..58eeac6 --- /dev/null +++ b/contrib/perl5/pod/perlintern.pod @@ -0,0 +1,26 @@ +=head1 NAME + +perlintern - autogenerated documentation of purely B<internal> + Perl functions + +=head1 DESCRIPTION + +This file is the autogenerated documentation of functions in the +Perl intrepreter that are documented using Perl's internal documentation +format but are not marked as part of the Perl API. In other words, +B<they are not for use in extensions>! + +=over 8 + +=back + +=head1 AUTHORS + +The autodocumentation system was orignally added to the Perl core by +Benjamin Stuhl. Documentation is by whoever was kind enough to +document their functions. + +=head1 SEE ALSO + +perlguts(1), perlapi(1) + diff --git a/contrib/perl5/pod/perlipc.pod b/contrib/perl5/pod/perlipc.pod index 2f99d10..8760257 100644 --- a/contrib/perl5/pod/perlipc.pod +++ b/contrib/perl5/pod/perlipc.pod @@ -58,7 +58,7 @@ You may also choose to assign the strings C<'IGNORE'> or C<'DEFAULT'> as the handler, in which case Perl will try to discard the signal or do the default thing. -On most UNIX platforms, the C<CHLD> (sometimes also known as C<CLD>) signal +On most Unix platforms, the C<CHLD> (sometimes also known as C<CLD>) signal has special behavior with respect to a value of C<'IGNORE'>. Setting C<$SIG{CHLD}> to C<'IGNORE'> on such a platform has the effect of not creating zombie processes when the parent process fails to C<wait()> @@ -126,7 +126,7 @@ or even the more elaborate: use POSIX ":sys_wait_h"; sub REAPER { my $child; - while ($child = waitpid(-1,WNOHANG)) { + while (($child = waitpid(-1,WNOHANG)) > 0) { $Kid_Status{$child} = $?; } $SIG{CHLD} = \&REAPER; # still loathe sysV @@ -152,6 +152,10 @@ Here's an example: }; if ($@ and $@ !~ /alarm clock restart/) { die } +If the operation being timed out is system() or qx(), this technique +is liable to generate zombies. If this matters to you, you'll +need to do your own fork() and exec(), and kill the errant child process. + For more complex signal handling, you might see the standard POSIX module. Lamentably, this is almost entirely undocumented, but the F<t/lib/posix.t> file from the Perl source distribution has some @@ -276,7 +280,7 @@ same effect as opening a pipe for reading: While this is true on the surface, it's much more efficient to process the file one line or record at a time because then you don't have to read the -whole thing into memory at once. It also gives you finer control of the +whole thing into memory at once. It also gives you finer control of the whole process, letting you to kill off the child process early if you'd like. @@ -307,8 +311,7 @@ To catch it, you could use this: Both the main process and any child processes it forks share the same STDIN, STDOUT, and STDERR filehandles. If both processes try to access -them at once, strange things can happen. You'll certainly want to any -stdio flush output buffers before forking. You may also want to close +them at once, strange things can happen. You may also want to close or reopen the filehandles for the child. You can get around this by opening your pipe with open(), but on some systems this means that the child process cannot outlive the parent. @@ -450,8 +453,8 @@ doesn't actually work: open(PROG_FOR_READING_AND_WRITING, "| some program |") -and if you forget to use the B<-w> flag, then you'll miss out -entirely on the diagnostic message: +and if you forget to use the C<use warnings> pragma or the B<-w> flag, +then you'll miss out entirely on the diagnostic message: Can't do bidirectional pipe at -e line 1. @@ -473,7 +476,6 @@ Here's an example of using open2(): use FileHandle; use IPC::Open2; $pid = open2(*Reader, *Writer, "cat -u -n" ); - Writer->autoflush(); # default here, actually print Writer "stuff\n"; $got = <Reader>; @@ -1022,7 +1024,7 @@ something to the server before fetching the server's response. The web server handing the "http" service, which is assumed to be at its standard port, number 80. If your the web server you're trying to connect to is at a different port (like 1080 or 8080), you should specify -as the named-parameter pair, C<PeerPort =E<gt> 8080>. The C<autoflush> +as the named-parameter pair, C<< PeerPort => 8080 >>. The C<autoflush> method is used on the socket because otherwise the system would buffer up the output we sent it. (If you're on a Mac, you'll also need to change every C<"\n"> in your code that sends data over the network to @@ -1140,7 +1142,7 @@ well. As always, setting up a server is little bit more involved than running a client. The model is that the server creates a special kind of socket that does nothing but listen on a particular port for incoming connections. -It does this by calling the C<IO::Socket::INET-E<gt>new()> method with +It does this by calling the C<< IO::Socket::INET->new() >> method with slightly different arguments than the client did. =over @@ -1159,7 +1161,7 @@ server. (Under Unix, ports under 1024 are restricted to the superuser.) In our sample, we'll use port 9000, but you can use any port that's not currently in use on your system. If you try to use one already in used, you'll get an "Address already in use" -message. Under Unix, the C<netstat -a> command will show +message. Under Unix, the C<netstat -a> command will show which services current have servers. =item Listen @@ -1303,16 +1305,16 @@ you weren't wanting it to. Here's a small example showing shared memory usage. - use IPC::SysV qw(IPC_PRIVATE IPC_RMID S_IRWXU S_IRWXG S_IRWXO); + use IPC::SysV qw(IPC_PRIVATE IPC_RMID S_IRWXU); $size = 2000; - $key = shmget(IPC_PRIVATE, $size, S_IRWXU|S_IRWXG|S_IRWXO) || die "$!"; - print "shm key $key\n"; + $id = shmget(IPC_PRIVATE, $size, S_IRWXU) || die "$!"; + print "shm key $id\n"; $message = "Message #1"; - shmwrite($key, $message, 0, 60) || die "$!"; + shmwrite($id, $message, 0, 60) || die "$!"; print "wrote: '$message'\n"; - shmread($key, $buff, 0, 60) || die "$!"; + shmread($id, $buff, 0, 60) || die "$!"; print "read : '$buff'\n"; # the buffer of shmread is zero-character end-padded. @@ -1320,16 +1322,16 @@ Here's a small example showing shared memory usage. print "un" unless $buff eq $message; print "swell\n"; - print "deleting shm $key\n"; - shmctl($key, IPC_RMID, 0) || die "$!"; + print "deleting shm $id\n"; + shmctl($id, IPC_RMID, 0) || die "$!"; Here's an example of a semaphore: use IPC::SysV qw(IPC_CREAT); $IPC_KEY = 1234; - $key = semget($IPC_KEY, 10, 0666 | IPC_CREAT ) || die "$!"; - print "shm key $key\n"; + $id = semget($IPC_KEY, 10, 0666 | IPC_CREAT ) || die "$!"; + print "shm key $id\n"; Put this code in a separate file to be run in more than one process. Call the file F<take>: @@ -1337,8 +1339,8 @@ Call the file F<take>: # create a semaphore $IPC_KEY = 1234; - $key = semget($IPC_KEY, 0 , 0 ); - die if !defined($key); + $id = semget($IPC_KEY, 0 , 0 ); + die if !defined($id); $semnum = 0; $semflag = 0; @@ -1346,14 +1348,14 @@ Call the file F<take>: # 'take' semaphore # wait for semaphore to be zero $semop = 0; - $opstring1 = pack("sss", $semnum, $semop, $semflag); + $opstring1 = pack("s!s!s!", $semnum, $semop, $semflag); # Increment the semaphore count $semop = 1; - $opstring2 = pack("sss", $semnum, $semop, $semflag); + $opstring2 = pack("s!s!s!", $semnum, $semop, $semflag); $opstring = $opstring1 . $opstring2; - semop($key,$opstring) || die "$!"; + semop($id,$opstring) || die "$!"; Put this code in a separate file to be run in more than one process. Call this file F<give>: @@ -1363,22 +1365,53 @@ Call this file F<give>: # that the second process continues $IPC_KEY = 1234; - $key = semget($IPC_KEY, 0, 0); - die if !defined($key); + $id = semget($IPC_KEY, 0, 0); + die if !defined($id); $semnum = 0; $semflag = 0; # Decrement the semaphore count $semop = -1; - $opstring = pack("sss", $semnum, $semop, $semflag); + $opstring = pack("s!s!s!", $semnum, $semop, $semflag); - semop($key,$opstring) || die "$!"; + semop($id,$opstring) || die "$!"; The SysV IPC code above was written long ago, and it's definitely clunky looking. For a more modern look, see the IPC::SysV module which is included with Perl starting from Perl 5.005. +A small example demonstrating SysV message queues: + + use IPC::SysV qw(IPC_PRIVATE IPC_RMID IPC_CREAT S_IRWXU); + + my $id = msgget(IPC_PRIVATE, IPC_CREAT | S_IRWXU); + + my $sent = "message"; + my $type = 1234; + my $rcvd; + my $type_rcvd; + + if (defined $id) { + if (msgsnd($id, pack("l! a*", $type_sent, $sent), 0)) { + if (msgrcv($id, $rcvd, 60, 0, 0)) { + ($type_rcvd, $rcvd) = unpack("l! a*", $rcvd); + if ($rcvd eq $sent) { + print "okay\n"; + } else { + print "not okay\n"; + } + } else { + die "# msgrcv failed\n"; + } + } else { + die "# msgsnd failed\n"; + } + msgctl($id, IPC_RMID, 0) || die "# msgctl failed: $!\n"; + } else { + die "# msgget failed\n"; + } + =head1 NOTES Most of these routines quietly but politely return C<undef> when they diff --git a/contrib/perl5/pod/perllexwarn.pod b/contrib/perl5/pod/perllexwarn.pod new file mode 100644 index 0000000..cee1687 --- /dev/null +++ b/contrib/perl5/pod/perllexwarn.pod @@ -0,0 +1,433 @@ +=head1 NAME + +perllexwarn - Perl Lexical Warnings + +=head1 DESCRIPTION + +The C<use warnings> pragma is a replacement for both the command line +flag B<-w> and the equivalent Perl variable, C<$^W>. + +The pragma works just like the existing "strict" pragma. +This means that the scope of the warning pragma is limited to the +enclosing block. It also means that that the pragma setting will not +leak across files (via C<use>, C<require> or C<do>). This allows +authors to independently define the degree of warning checks that will +be applied to their module. + +By default, optional warnings are disabled, so any legacy code that +doesn't attempt to control the warnings will work unchanged. + +All warnings are enabled in a block by either of these: + + use warnings ; + use warnings 'all' ; + +Similarly all warnings are disabled in a block by either of these: + + no warnings ; + no warnings 'all' ; + +For example, consider the code below: + + use warnings ; + my $a ; + my $b ; + { + no warnings ; + $b = 2 if $a EQ 3 ; + } + $b = 1 if $a NE 3 ; + +The code in the enclosing block has warnings enabled, but the inner +block has them disabled. In this case that means that the use of the C<EQ> +operator won't trip a C<"Use of EQ is deprecated"> warning, but the use of +C<NE> will produce a C<"Use of NE is deprecated"> warning. + +=head2 Default Warnings and Optional Warnings + +Before the introduction of lexical warnings, Perl had two classes of +warnings: mandatory and optional. + +As its name suggests, if your code tripped a mandatory warning, you +would get a warning whether you wanted it or not. +For example, the code below would always produce an C<"isn't numeric"> +warning about the "2:". + + my $a = "2:" + 3; + +With the introduction of lexical warnings, mandatory warnings now become +I<default> warnings. The difference is that although the previously +mandatory warnings are still enabled by default, they can then be +subsequently enabled or disabled with the lexical warning pragma. For +example, in the code below, an C<"isn't numeric"> warning will only +be reported for the C<$a> variable. + + my $a = "2:" + 3; + no warnings ; + my $b = "2:" + 3; + +Note that neither the B<-w> flag or the C<$^W> can be used to +disable/enable default warnings. They are still mandatory in this case. + +=head2 What's wrong with B<-w> and C<$^W> + +Although very useful, the big problem with using B<-w> on the command +line to enable warnings is that it is all or nothing. Take the typical +scenario when you are writing a Perl program. Parts of the code you +will write yourself, but it's very likely that you will make use of +pre-written Perl modules. If you use the B<-w> flag in this case, you +end up enabling warnings in pieces of code that you haven't written. + +Similarly, using C<$^W> to either disable or enable blocks of code is +fundamentally flawed. For a start, say you want to disable warnings in +a block of code. You might expect this to be enough to do the trick: + + { + local ($^W) = 0 ; + my $a =+ 2 ; + my $b ; chop $b ; + } + +When this code is run with the B<-w> flag, a warning will be produced +for the C<$a> line -- C<"Reversed += operator">. + +The problem is that Perl has both compile-time and run-time warnings. To +disable compile-time warnings you need to rewrite the code like this: + + { + BEGIN { $^W = 0 } + my $a =+ 2 ; + my $b ; chop $b ; + } + +The other big problem with C<$^W> is that way you can inadvertently +change the warning setting in unexpected places in your code. For example, +when the code below is run (without the B<-w> flag), the second call +to C<doit> will trip a C<"Use of uninitialized value"> warning, whereas +the first will not. + + sub doit + { + my $b ; chop $b ; + } + + doit() ; + + { + local ($^W) = 1 ; + doit() + } + +This is a side-effect of C<$^W> being dynamically scoped. + +Lexical warnings get around these limitations by allowing finer control +over where warnings can or can't be tripped. + +=head2 Controlling Warnings from the Command Line + +There are three Command Line flags that can be used to control when +warnings are (or aren't) produced: + +=over 5 + +=item B<-w> + +This is the existing flag. If the lexical warnings pragma is B<not> +used in any of you code, or any of the modules that you use, this flag +will enable warnings everywhere. See L<Backward Compatibility> for +details of how this flag interacts with lexical warnings. + +=item B<-W> + +If the B<-W> flag is used on the command line, it will enable all warnings +throughout the program regardless of whether warnings were disabled +locally using C<no warnings> or C<$^W =0>. This includes all files that get +included via C<use>, C<require> or C<do>. +Think of it as the Perl equivalent of the "lint" command. + +=item B<-X> + +Does the exact opposite to the B<-W> flag, i.e. it disables all warnings. + +=back + +=head2 Backward Compatibility + +If you are used with working with a version of Perl prior to the +introduction of lexically scoped warnings, or have code that uses both +lexical warnings and C<$^W>, this section will describe how they interact. + +How Lexical Warnings interact with B<-w>/C<$^W>: + +=over 5 + +=item 1. + +If none of the three command line flags (B<-w>, B<-W> or B<-X>) that +control warnings is used and neither C<$^W> or the C<warnings> pragma +are used, then default warnings will be enabled and optional warnings +disabled. +This means that legacy code that doesn't attempt to control the warnings +will work unchanged. + +=item 2. + +The B<-w> flag just sets the global C<$^W> variable as in 5.005 -- this +means that any legacy code that currently relies on manipulating C<$^W> +to control warning behavior will still work as is. + +=item 3. + +Apart from now being a boolean, the C<$^W> variable operates in exactly +the same horrible uncontrolled global way, except that it cannot +disable/enable default warnings. + +=item 4. + +If a piece of code is under the control of the C<warnings> pragma, +both the C<$^W> variable and the B<-w> flag will be ignored for the +scope of the lexical warning. + +=item 5. + +The only way to override a lexical warnings setting is with the B<-W> +or B<-X> command line flags. + +=back + +The combined effect of 3 & 4 is that it will will allow code which uses +the C<warnings> pragma to control the warning behavior of $^W-type +code (using a C<local $^W=0>) if it really wants to, but not vice-versa. + +=head2 Category Hierarchy + +A hierarchy of "categories" have been defined to allow groups of warnings +to be enabled/disabled in isolation. + +The current hierarchy is: + + all -+ + | + +- chmod + | + +- closure + | + +- exiting + | + +- glob + | + +- io -----------+ + | | + | +- closed + | | + | +- exec + | | + | +- newline + | | + | +- pipe + | | + | +- unopened + | + +- misc + | + +- numeric + | + +- once + | + +- overflow + | + +- pack + | + +- portable + | + +- recursion + | + +- redefine + | + +- regexp + | + +- severe -------+ + | | + | +- debugging + | | + | +- inplace + | | + | +- internal + | | + | +- malloc + | + +- signal + | + +- substr + | + +- syntax -------+ + | | + | +- ambiguous + | | + | +- bareword + | | + | +- deprecated + | | + | +- digit + | | + | +- parenthesis + | | + | +- precedence + | | + | +- printf + | | + | +- prototype + | | + | +- qw + | | + | +- reserved + | | + | +- semicolon + | + +- taint + | + +- umask + | + +- uninitialized + | + +- unpack + | + +- untie + | + +- utf8 + | + +- void + | + +- y2k + +Just like the "strict" pragma any of these categories can be combined + + use warnings qw(void redefine) ; + no warnings qw(io syntax untie) ; + +Also like the "strict" pragma, if there is more than one instance of the +C<warnings> pragma in a given scope the cumulative effect is additive. + + use warnings qw(void) ; # only "void" warnings enabled + ... + use warnings qw(io) ; # only "void" & "io" warnings enabled + ... + no warnings qw(void) ; # only "io" warnings enabled + +To determine which category a specific warning has been assigned to see +L<perldiag>. + +=head2 Fatal Warnings + +The presence of the word "FATAL" in the category list will escalate any +warnings detected from the categories specified in the lexical scope +into fatal errors. In the code below, there are 3 places where a +deprecated warning will be detected, the middle one will produce a +fatal error. + + + use warnings ; + + $a = 1 if $a EQ $b ; + + { + use warnings FATAL => qw(deprecated) ; + $a = 1 if $a EQ $b ; + } + + $a = 1 if $a EQ $b ; + +=head2 Reporting Warnings from a Module + +The C<warnings> pragma provides a number of functions that are useful for +module authors. These are used when you want to report a module-specific +warning when the calling module has enabled warnings via the C<warnings> +pragma. + +Consider the module C<MyMod::Abc> below. + + package MyMod::Abc; + + use warnings::register; + + sub open { + my $path = shift ; + if (warnings::enabled() && $path !~ m#^/#) { + warnings::warn("changing relative path to /tmp/"); + $path = "/tmp/$path" ; + } + } + + 1 ; + +The call to C<warnings::register> will create a new warnings category +called "MyMod::abc", i.e. the new category name matches the module +name. The C<open> function in the module will display a warning message +if it gets given a relative path as a parameter. This warnings will only +be displayed if the code that uses C<MyMod::Abc> has actually enabled +them with the C<warnings> pragma like below. + + use MyMod::Abc; + use warnings 'MyMod::Abc'; + ... + abc::open("../fred.txt"); + +It is also possible to test whether the pre-defined warnings categories are +set in the calling module with the C<warnings::enabled> function. Consider +this snippet of code: + + package MyMod::Abc; + + sub open { + if (warnings::enabled("deprecated")) { + warnings::warn("deprecated", + "open is deprecated, use new instead") ; + } + new(@_) ; + } + + sub new + ... + 1 ; + +The function C<open> has been deprecated, so code has been included to +display a warning message whenever the calling module has (at least) the +"deprecated" warnings category enabled. Something like this, say. + + use warnings 'deprecated'; + use MyMod::Abc; + ... + MyMod::Abc::open($filename) ; + +The C<warnings::warn> function should be used to actually display the +warnings message. This is because they can make use of the feature that +allows warnings to be escalated into fatal errors. So in this case + + use MyMod::Abc; + use warnings FATAL => 'MyMod::Abc'; + ... + MyMod::Abc::open('../fred.txt'); + +the C<warnings::warn> function will detect this and die after +displaying the warning message. + +=head1 TODO + + perl5db.pl + The debugger saves and restores C<$^W> at runtime. I haven't checked + whether the debugger will still work with the lexical warnings + patch applied. + + diagnostics.pm + I *think* I've got diagnostics to work with the lexical warnings + patch, but there were design decisions made in diagnostics to work + around the limitations of C<$^W>. Now that those limitations are gone, + the module should be revisited. + +=head1 SEE ALSO + +L<warnings>, L<perldiag>. + +=head1 AUTHOR + +Paul Marquess diff --git a/contrib/perl5/pod/perllocale.pod b/contrib/perl5/pod/perllocale.pod index 08b50e0..be37385 100644 --- a/contrib/perl5/pod/perllocale.pod +++ b/contrib/perl5/pod/perllocale.pod @@ -309,7 +309,7 @@ discussed above. We decided to try that instead of the above faulty locale "En_US"--and in Cshish shells (B<csh>, B<tcsh>) setenv LC_ALL en_US.ISO8859-1 - + If you do not know what shell you have, consult your local helpdesk or the equivalent. @@ -332,9 +332,9 @@ Second, if using the listed commands you see something B<exactly> (prefix matches do not count and case usually counts) like "En_US" without the quotes, then you should be okay because you are using a locale name that should be installed and available in your system. -In this case, see L<Permanently fixing system locale configuration>. +In this case, see L<Permanently fixing your system's locale configuration>. -=head2 Permanently fixing your locale configuration +=head2 Permanently fixing your system's locale configuration This is when you see something like: @@ -608,8 +608,12 @@ obeys the current C<LC_TIME> locale. The remaining locale category, C<LC_MESSAGES> (possibly supplemented by others in particular implementations) is not currently used by -Perl--except possibly to affect the behavior of library functions called -by extensions outside the standard Perl distribution. +Perl--except possibly to affect the behavior of library functions +called by extensions outside the standard Perl distribution and by the +operating system and its utilities. Note especially that the string +value of C<$!> and the error messages given by external utilities may +be changed by C<LC_MESSAGES>. If you want to have portable error +codes, use C<%!>. See L<Errno>. =head1 SECURITY @@ -637,11 +641,12 @@ case-mapping table is in effect. =item * -If the decimal point character in the C<LC_NUMERIC> locale is -surreptitiously changed from a dot to a comma, C<sprintf("%g", -0.123456e3)> produces a string result of "123,456". Many people would -interpret this as one hundred and twenty-three thousand, four hundred -and fifty-six. +Some systems are broken in that they allow the "C" locale to be +overridden by users. If the decimal point character in the +C<LC_NUMERIC> category of the "C" locale is surreptitiously changed +from a dot to a comma, C<sprintf("%g", 0.123456e3)> produces a +string result of "123,456". Many people would interpret this as +one hundred and twenty-three thousand, four hundred and fifty-six. =item * @@ -710,10 +715,6 @@ if modified as a result of a substitution based on a regular expression match involving C<\w>, C<\W>, C<\s>, or C<\S>; or of case-mapping with C<\l>, C<\L>,C<\u> or C<\U>. -=item B<In-memory formatting function> (sprintf()): - -Result is tainted if C<use locale> is in effect. - =item B<Output formatting functions> (printf() and write()): Success/failure result is never tainted. diff --git a/contrib/perl5/pod/perllol.pod b/contrib/perl5/pod/perllol.pod index 56f08c2..f015a20 100644 --- a/contrib/perl5/pod/perllol.pod +++ b/contrib/perl5/pod/perllol.pod @@ -1,62 +1,62 @@ =head1 NAME -perlLoL - Manipulating Lists of Lists in Perl +perllol - Manipulating Arrays of Arrays in Perl =head1 DESCRIPTION -=head1 Declaration and Access of Lists of Lists +=head1 Declaration and Access of Arrays of Arrays -The simplest thing to build is a list of lists (sometimes called an array -of arrays). It's reasonably easy to understand, and almost everything -that applies here will also be applicable later on with the fancier data -structures. +The simplest thing to build an array of arrays (sometimes imprecisely +called a list of lists). It's reasonably easy to understand, and +almost everything that applies here will also be applicable later +on with the fancier data structures. -A list of lists, or an array of an array if you would, is just a regular -old array @LoL that you can get at with two subscripts, like C<$LoL[3][2]>. Here's -a declaration of the array: +An array of an array is just a regular old array @AoA that you can +get at with two subscripts, like C<$AoA[3][2]>. Here's a declaration +of the array: - # assign to our array a list of list references - @LoL = ( + # assign to our array, an array of array references + @AoA = ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], [ "homer", "marge", "bart" ], ); - print $LoL[2][2]; + print $AoA[2][2]; bart Now you should be very careful that the outer bracket type is a round one, that is, a parenthesis. That's because you're assigning to -an @list, so you need parentheses. If you wanted there I<not> to be an @LoL, +an @array, so you need parentheses. If you wanted there I<not> to be an @AoA, but rather just a reference to it, you could do something more like this: - # assign a reference to list of list references - $ref_to_LoL = [ + # assign a reference to array of array references + $ref_to_AoA = [ [ "fred", "barney", "pebbles", "bambam", "dino", ], [ "homer", "bart", "marge", "maggie", ], [ "george", "jane", "elroy", "judy", ], ]; - print $ref_to_LoL->[2][2]; + print $ref_to_AoA->[2][2]; Notice that the outer bracket type has changed, and so our access syntax has also changed. That's because unlike C, in perl you can't freely -interchange arrays and references thereto. $ref_to_LoL is a reference to an -array, whereas @LoL is an array proper. Likewise, C<$LoL[2]> is not an +interchange arrays and references thereto. $ref_to_AoA is a reference to an +array, whereas @AoA is an array proper. Likewise, C<$AoA[2]> is not an array, but an array ref. So how come you can write these: - $LoL[2][2] - $ref_to_LoL->[2][2] + $AoA[2][2] + $ref_to_AoA->[2][2] instead of having to write these: - $LoL[2]->[2] - $ref_to_LoL->[2]->[2] + $AoA[2]->[2] + $ref_to_AoA->[2]->[2] Well, that's because the rule is that on adjacent brackets only (whether square or curly), you are free to omit the pointer dereferencing arrow. But you cannot do so for the very first one if it's a scalar containing -a reference, which means that $ref_to_LoL always needs it. +a reference, which means that $ref_to_AoA always needs it. =head1 Growing Your Own @@ -67,81 +67,81 @@ it up entirely from scratch? First, let's look at reading it in from a file. This is something like adding a row at a time. We'll assume that there's a flat file in which each line is a row and each word an element. If you're trying to develop an -@LoL list containing all these, here's the right way to do that: +@AoA array containing all these, here's the right way to do that: while (<>) { @tmp = split; - push @LoL, [ @tmp ]; + push @AoA, [ @tmp ]; } You might also have loaded that from a function: for $i ( 1 .. 10 ) { - $LoL[$i] = [ somefunc($i) ]; + $AoA[$i] = [ somefunc($i) ]; } Or you might have had a temporary variable sitting around with the -list in it. +array in it. for $i ( 1 .. 10 ) { @tmp = somefunc($i); - $LoL[$i] = [ @tmp ]; + $AoA[$i] = [ @tmp ]; } -It's very important that you make sure to use the C<[]> list reference +It's very important that you make sure to use the C<[]> array reference constructor. That's because this will be very wrong: - $LoL[$i] = @tmp; + $AoA[$i] = @tmp; -You see, assigning a named list like that to a scalar just counts the +You see, assigning a named array like that to a scalar just counts the number of elements in @tmp, which probably isn't what you want. If you are running under C<use strict>, you'll have to add some declarations to make it happy: use strict; - my(@LoL, @tmp); + my(@AoA, @tmp); while (<>) { @tmp = split; - push @LoL, [ @tmp ]; + push @AoA, [ @tmp ]; } Of course, you don't need the temporary array to have a name at all: while (<>) { - push @LoL, [ split ]; + push @AoA, [ split ]; } You also don't have to use push(). You could just make a direct assignment if you knew where you wanted to put it: - my (@LoL, $i, $line); + my (@AoA, $i, $line); for $i ( 0 .. 10 ) { $line = <>; - $LoL[$i] = [ split ' ', $line ]; + $AoA[$i] = [ split ' ', $line ]; } or even just - my (@LoL, $i); + my (@AoA, $i); for $i ( 0 .. 10 ) { - $LoL[$i] = [ split ' ', <> ]; + $AoA[$i] = [ split ' ', <> ]; } -You should in general be leery of using potential list functions -in a scalar context without explicitly stating such. -This would be clearer to the casual reader: +You should in general be leery of using functions that could +potentially return lists in scalar context without explicitly stating +such. This would be clearer to the casual reader: - my (@LoL, $i); + my (@AoA, $i); for $i ( 0 .. 10 ) { - $LoL[$i] = [ split ' ', scalar(<>) ]; + $AoA[$i] = [ split ' ', scalar(<>) ]; } -If you wanted to have a $ref_to_LoL variable as a reference to an array, +If you wanted to have a $ref_to_AoA variable as a reference to an array, you'd have to do something like this: while (<>) { - push @$ref_to_LoL, [ split ]; + push @$ref_to_AoA, [ split ]; } Now you can add new rows. What about adding new columns? If you're @@ -149,12 +149,12 @@ dealing with just matrices, it's often easiest to use simple assignment: for $x (1 .. 10) { for $y (1 .. 10) { - $LoL[$x][$y] = func($x, $y); + $AoA[$x][$y] = func($x, $y); } } for $x ( 3, 7, 9 ) { - $LoL[$x][20] += func2($x); + $AoA[$x][20] += func2($x); } It doesn't matter whether those elements are already @@ -165,11 +165,11 @@ If you wanted just to append to a row, you'd have to do something a bit funnier looking: # add new columns to an existing row - push @{ $LoL[0] }, "wilma", "betty"; + push @{ $AoA[0] }, "wilma", "betty"; Notice that I I<couldn't> say just: - push $LoL[0], "wilma", "betty"; # WRONG! + push $AoA[0], "wilma", "betty"; # WRONG! In fact, that wouldn't even compile. How come? Because the argument to push() must be a real array, not just a reference to such. @@ -180,12 +180,12 @@ Now it's time to print your data structure out. How are you going to do that? Well, if you want only one of the elements, it's trivial: - print $LoL[0][0]; + print $AoA[0][0]; If you want to print the whole thing, though, you can't say - print @LoL; # WRONG + print @AoA; # WRONG because you'll get just references listed, and perl will never automatically dereference things for you. Instead, you have to @@ -193,41 +193,41 @@ roll yourself a loop or two. This prints the whole structure, using the shell-style for() construct to loop across the outer set of subscripts. - for $aref ( @LoL ) { + for $aref ( @AoA ) { print "\t [ @$aref ],\n"; } If you wanted to keep track of subscripts, you might do this: - for $i ( 0 .. $#LoL ) { - print "\t elt $i is [ @{$LoL[$i]} ],\n"; + for $i ( 0 .. $#AoA ) { + print "\t elt $i is [ @{$AoA[$i]} ],\n"; } or maybe even this. Notice the inner loop. - for $i ( 0 .. $#LoL ) { - for $j ( 0 .. $#{$LoL[$i]} ) { - print "elt $i $j is $LoL[$i][$j]\n"; + for $i ( 0 .. $#AoA ) { + for $j ( 0 .. $#{$AoA[$i]} ) { + print "elt $i $j is $AoA[$i][$j]\n"; } } As you can see, it's getting a bit complicated. That's why sometimes is easier to take a temporary on your way through: - for $i ( 0 .. $#LoL ) { - $aref = $LoL[$i]; + for $i ( 0 .. $#AoA ) { + $aref = $AoA[$i]; for $j ( 0 .. $#{$aref} ) { - print "elt $i $j is $LoL[$i][$j]\n"; + print "elt $i $j is $AoA[$i][$j]\n"; } } Hmm... that's still a bit ugly. How about this: - for $i ( 0 .. $#LoL ) { - $aref = $LoL[$i]; + for $i ( 0 .. $#AoA ) { + $aref = $AoA[$i]; $n = @$aref - 1; for $j ( 0 .. $n ) { - print "elt $i $j is $LoL[$i][$j]\n"; + print "elt $i $j is $AoA[$i][$j]\n"; } } @@ -240,49 +240,49 @@ pointer arrow for dereferencing, no such convenience exists for slices. (Remember, of course, that you can always write a loop to do a slice operation.) -Here's how to do one operation using a loop. We'll assume an @LoL +Here's how to do one operation using a loop. We'll assume an @AoA variable as before. @part = (); $x = 4; for ($y = 7; $y < 13; $y++) { - push @part, $LoL[$x][$y]; + push @part, $AoA[$x][$y]; } That same loop could be replaced with a slice operation: - @part = @{ $LoL[4] } [ 7..12 ]; + @part = @{ $AoA[4] } [ 7..12 ]; but as you might well imagine, this is pretty rough on the reader. Ah, but what if you wanted a I<two-dimensional slice>, such as having $x run from 4..8 and $y run from 7 to 12? Hmm... here's the simple way: - @newLoL = (); + @newAoA = (); for ($startx = $x = 4; $x <= 8; $x++) { for ($starty = $y = 7; $y <= 12; $y++) { - $newLoL[$x - $startx][$y - $starty] = $LoL[$x][$y]; + $newAoA[$x - $startx][$y - $starty] = $AoA[$x][$y]; } } We can reduce some of the looping through slices for ($x = 4; $x <= 8; $x++) { - push @newLoL, [ @{ $LoL[$x] } [ 7..12 ] ]; + push @newAoA, [ @{ $AoA[$x] } [ 7..12 ] ]; } If you were into Schwartzian Transforms, you would probably have selected map for that - @newLoL = map { [ @{ $LoL[$_] } [ 7..12 ] ] } 4 .. 8; + @newAoA = map { [ @{ $AoA[$_] } [ 7..12 ] ] } 4 .. 8; Although if your manager accused of seeking job security (or rapid insecurity) through inscrutable code, it would be hard to argue. :-) If I were you, I'd put that in a function: - @newLoL = splice_2D( \@LoL, 4 => 8, 7 => 12 ); + @newAoA = splice_2D( \@AoA, 4 => 8, 7 => 12 ); sub splice_2D { - my $lrr = shift; # ref to list of list refs! + my $lrr = shift; # ref to array of array refs! my ($x_lo, $x_hi, $y_lo, $y_hi) = @_; diff --git a/contrib/perl5/pod/perlmod.pod b/contrib/perl5/pod/perlmod.pod index 48ebf23..63324a4 100644 --- a/contrib/perl5/pod/perlmod.pod +++ b/contrib/perl5/pod/perlmod.pod @@ -6,25 +6,27 @@ perlmod - Perl modules (packages and symbol tables) =head2 Packages -Perl provides a mechanism for alternative namespaces to protect packages -from stomping on each other's variables. In fact, there's really no such -thing as a global variable in Perl (although some identifiers default -to the main package instead of the current one). The package statement -declares the compilation unit as -being in the given namespace. The scope of the package declaration -is from the declaration itself through the end of the enclosing block, -C<eval>, C<sub>, or end of file, whichever comes first (the same scope -as the my() and local() operators). All further unqualified dynamic -identifiers will be in this namespace. A package statement only affects -dynamic variables--including those you've used local() on--but -I<not> lexical variables created with my(). Typically it would be -the first declaration in a file to be included by the C<require> or -C<use> operator. You can switch into a package in more than one place; -it merely influences which symbol table is used by the compiler for the -rest of that block. You can refer to variables and filehandles in other -packages by prefixing the identifier with the package name and a double -colon: C<$Package::Variable>. If the package name is null, the C<main> -package is assumed. That is, C<$::sail> is equivalent to C<$main::sail>. +Perl provides a mechanism for alternative namespaces to protect +packages from stomping on each other's variables. In fact, there's +really no such thing as a global variable in Perl . The package +statement declares the compilation unit as being in the given +namespace. The scope of the package declaration is from the +declaration itself through the end of the enclosing block, C<eval>, +or file, whichever comes first (the same scope as the my() and +local() operators). Unqualified dynamic identifiers will be in +this namespace, except for those few identifiers that if unqualified, +default to the main package instead of the current one as described +below. A package statement affects only dynamic variables--including +those you've used local() on--but I<not> lexical variables created +with my(). Typically it would be the first declaration in a file +included by the C<do>, C<require>, or C<use> operators. You can +switch into a package in more than one place; it merely influences +which symbol table is used by the compiler for the rest of that +block. You can refer to variables and filehandles in other packages +by prefixing the identifier with the package name and a double +colon: C<$Package::Variable>. If the package name is null, the +C<main> package is assumed. That is, C<$::sail> is equivalent to +C<$main::sail>. The old package delimiter was a single quote, but double colon is now the preferred delimiter, in part because it's more readable to humans, and @@ -37,35 +39,38 @@ C<"This is $owner's house">, you'll be accessing C<$owner::s>; that is, the $s variable in package C<owner>, which is probably not what you meant. Use braces to disambiguate, as in C<"This is ${owner}'s house">. -Packages may be nested inside other packages: C<$OUTER::INNER::var>. This -implies nothing about the order of name lookups, however. All symbols +Packages may themselves contain package separators, as in +C<$OUTER::INNER::var>. This implies nothing about the order of +name lookups, however. There are no relative packages: all symbols are either local to the current package, or must be fully qualified from the outer package name down. For instance, there is nowhere -within package C<OUTER> that C<$INNER::var> refers to C<$OUTER::INNER::var>. -It would treat package C<INNER> as a totally separate global package. - -Only identifiers starting with letters (or underscore) are stored in a -package's symbol table. All other symbols are kept in package C<main>, -including all of the punctuation variables like $_. In addition, when -unqualified, the identifiers STDIN, STDOUT, STDERR, ARGV, ARGVOUT, ENV, -INC, and SIG are forced to be in package C<main>, even when used for other -purposes than their builtin one. Note also that, if you have a package -called C<m>, C<s>, or C<y>, then you can't use the qualified form of an -identifier because it will be interpreted instead as a pattern match, -a substitution, or a transliteration. - -(Variables beginning with underscore used to be forced into package +within package C<OUTER> that C<$INNER::var> refers to +C<$OUTER::INNER::var>. It would treat package C<INNER> as a totally +separate global package. + +Only identifiers starting with letters (or underscore) are stored +in a package's symbol table. All other symbols are kept in package +C<main>, including all punctuation variables, like $_. In addition, +when unqualified, the identifiers STDIN, STDOUT, STDERR, ARGV, +ARGVOUT, ENV, INC, and SIG are forced to be in package C<main>, +even when used for other purposes than their built-in one. If you +have a package called C<m>, C<s>, or C<y>, then you can't use the +qualified form of an identifier because it would be instead interpreted +as a pattern match, a substitution, or a transliteration. + +Variables beginning with underscore used to be forced into package main, but we decided it was more useful for package writers to be able to use leading underscore to indicate private variables and method names. -$_ is still global though.) +$_ is still global though. See also L<perlvar/"Technical Note on the +Syntax of Variable Names">. -Eval()ed strings are compiled in the package in which the eval() was +C<eval>ed strings are compiled in the package in which the eval() was compiled. (Assignments to C<$SIG{}>, however, assume the signal handler specified is in the C<main> package. Qualify the signal handler name if you wish to have a signal handler in a package.) For an example, examine F<perldb.pl> in the Perl library. It initially switches to the C<DB> package so that the debugger doesn't interfere with variables -in the script you are trying to debug. At various points, however, it +in the program you are trying to debug. At various points, however, it temporarily switches back to the C<main> package to evaluate various expressions in the context of the C<main> package (or wherever you came from). See L<perldebug>. @@ -92,8 +97,8 @@ table lookups at compile time: local $main::{foo} = $main::{bar}; You can use this to print out all the variables in a package, for -instance. The standard F<dumpvar.pl> library and the CPAN module -Devel::Symdump make use of this. +instance. The standard but antequated F<dumpvar.pl> library and +the CPAN module Devel::Symdump make use of this. Assignment to a typeglob performs an aliasing operation, i.e., @@ -102,7 +107,7 @@ Assignment to a typeglob performs an aliasing operation, i.e., causes variables, subroutines, formats, and file and directory handles accessible via the identifier C<richard> also to be accessible via the identifier C<dick>. If you want to alias only a particular variable or -subroutine, you can assign a reference instead: +subroutine, assign a reference instead: *dick = \$richard; @@ -130,7 +135,7 @@ is a somewhat tricky way of passing around references cheaply when you won't want to have to remember to dereference variables explicitly. -Another use of symbol tables is for making "constant" scalars. +Another use of symbol tables is for making "constant" scalars. *PI = \3.14159265358979; @@ -157,14 +162,59 @@ This prints You gave me main::foo You gave me bar::baz -The *foo{THING} notation can also be used to obtain references to the +The C<*foo{THING}> notation can also be used to obtain references to the individual elements of *foo, see L<perlref>. +Subroutine definitions (and declarations, for that matter) need +not necessarily be situated in the package whose symbol table they +occupy. You can define a subroutine outside its package by +explicitly qualifying the name of the subroutine: + + package main; + sub Some_package::foo { ... } # &foo defined in Some_package + +This is just a shorthand for a typeglob assignment at compile time: + + BEGIN { *Some_package::foo = sub { ... } } + +and is I<not> the same as writing: + + { + package Some_package; + sub foo { ... } + } + +In the first two versions, the body of the subroutine is +lexically in the main package, I<not> in Some_package. So +something like this: + + package main; + + $Some_package::name = "fred"; + $main::name = "barney"; + + sub Some_package::foo { + print "in ", __PACKAGE__, ": \$name is '$name'\n"; + } + + Some_package::foo(); + +prints: + + in main: $name is 'barney' + +rather than: + + in Some_package: $name is 'fred' + +This also has implications for the use of the SUPER:: qualifier +(see L<perlobj>). + =head2 Package Constructors and Destructors -There are two special subroutine definitions that function as package -constructors and destructors. These are the C<BEGIN> and C<END> -routines. The C<sub> is optional for these routines. +Four special subroutines act as package constructors and destructors. +These are the C<BEGIN>, C<CHECK>, C<INIT>, and C<END> routines. The +C<sub> is optional for these routines. A C<BEGIN> subroutine is executed as soon as possible, that is, the moment it is completely defined, even before the rest of the containing file @@ -175,55 +225,69 @@ files in time to be visible to the rest of the file. Once a C<BEGIN> has run, it is immediately undefined and any code it used is returned to Perl's memory pool. This means you can't ever explicitly call a C<BEGIN>. -An C<END> subroutine is executed as late as possible, that is, when -the interpreter is being exited, even if it is exiting as a result of -a die() function. (But not if it's polymorphing into another program -via C<exec>, or being blown out of the water by a signal--you have to -trap that yourself (if you can).) You may have multiple C<END> blocks -within a file--they will execute in reverse order of definition; that is: -last in, first out (LIFO). - -Inside an C<END> subroutine, C<$?> contains the value that the script is +An C<END> subroutine is executed as late as possible, that is, after +perl has finished running the program and just before the interpreter +is being exited, even if it is exiting as a result of a die() function. +(But not if it's polymorphing into another program via C<exec>, or +being blown out of the water by a signal--you have to trap that yourself +(if you can).) You may have multiple C<END> blocks within a file--they +will execute in reverse order of definition; that is: last in, first +out (LIFO). C<END> blocks are not executed when you run perl with the +C<-c> switch. + +Inside an C<END> subroutine, C<$?> contains the value that the program is going to pass to C<exit()>. You can modify C<$?> to change the exit -value of the script. Beware of changing C<$?> by accident (e.g. by +value of the program. Beware of changing C<$?> by accident (e.g. by running something via C<system>). -Note that when you use the B<-n> and B<-p> switches to Perl, C<BEGIN> and +Similar to C<BEGIN> blocks, C<INIT> blocks are run just before the +Perl runtime begins execution, in "first in, first out" (FIFO) order. +For example, the code generators documented in L<perlcc> make use of +C<INIT> blocks to initialize and resolve pointers to XSUBs. + +Similar to C<END> blocks, C<CHECK> blocks are run just after the +Perl compile phase ends and before the run time begins, in +LIFO order. C<CHECK> blocks are again useful in the Perl compiler +suite to save the compiled state of the program. + +When you use the B<-n> and B<-p> switches to Perl, C<BEGIN> and C<END> work just as they do in B<awk>, as a degenerate case. As currently implemented (and subject to change, since its inconvenient at best), -both C<BEGIN> I<and> C<END> blocks are run when you use the B<-c> switch +both C<BEGIN> and<END> blocks are run when you use the B<-c> switch for a compile-only syntax check, although your main code is not. =head2 Perl Classes -There is no special class syntax in Perl, but a package may function +There is no special class syntax in Perl, but a package may act as a class if it provides subroutines to act as methods. Such a package may also derive some of its methods from another class (package) -by listing the other package name in its global @ISA array (which +by listing the other package name(s) in its global @ISA array (which must be a package global, not a lexical). For more on this, see L<perltoot> and L<perlobj>. =head2 Perl Modules -A module is just a package that is defined in a library file of -the same name, and is designed to be reusable. It may do this by -providing a mechanism for exporting some of its symbols into the symbol -table of any package using it. Or it may function as a class -definition and make its semantics available implicitly through method -calls on the class and its objects, without explicit exportation of any -symbols. Or it can do a little of both. +A module is just a set of related function in a library file a Perl +package with the same name as the file. It is specifically designed +to be reusable by other modules or programs. It may do this by +providing a mechanism for exporting some of its symbols into the +symbol table of any package using it. Or it may function as a class +definition and make its semantics available implicitly through +method calls on the class and its objects, without explicitly +exportating anything. Or it can do a little of both. -For example, to start a normal module called Some::Module, create -a file called Some/Module.pm and start with this template: +For example, to start a traditional, non-OO module called Some::Module, +create a file called F<Some/Module.pm> and start with this template: package Some::Module; # assumes Some/Module.pm use strict; + use warnings; BEGIN { use Exporter (); - use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); # set the version for version checking $VERSION = 1.00; @@ -238,10 +302,11 @@ a file called Some/Module.pm and start with this template: # as well as any optionally exported functions @EXPORT_OK = qw($Var1 %Hashit &func3); } - use vars @EXPORT_OK; + our @EXPORT_OK; # non-exported package globals go here - use vars qw(@more $stuff); + our @more; + our $stuff; # initialize package globals, first exported ones $Var1 = ''; @@ -275,10 +340,13 @@ a file called Some/Module.pm and start with this template: END { } # module clean-up code here (global destructor) -Then go on to declare and use your variables in functions -without any qualifications. -See L<Exporter> and the L<perlmodlib> for details on -mechanics and style issues in module creation. + ## YOUR CODE GOES HERE + + 1; # don't forget to return a true value from the file + +Then go on to declare and use your variables in functions without +any qualifications. See L<Exporter> and the L<perlmodlib> for +details on mechanics and style issues in module creation. Perl modules are included into your program by saying @@ -304,12 +372,13 @@ is exactly equivalent to BEGIN { require Module; } -All Perl module files have the extension F<.pm>. C<use> assumes this so -that you don't have to spell out "F<Module.pm>" in quotes. This also -helps to differentiate new modules from old F<.pl> and F<.ph> files. -Module names are also capitalized unless they're functioning as pragmas, -"Pragmas" are in effect compiler directives, and are sometimes called -"pragmatic modules" (or even "pragmata" if you're a classicist). +All Perl module files have the extension F<.pm>. The C<use> operator +assumes this so you don't have to spell out "F<Module.pm>" in quotes. +This also helps to differentiate new modules from old F<.pl> and +F<.ph> files. Module names are also capitalized unless they're +functioning as pragmas; pragmas are in effect compiler directives, +and are sometimes called "pragmatic modules" (or even "pragmata" +if you're a classicist). The two statements: @@ -319,18 +388,19 @@ The two statements: differ from each other in two ways. In the first case, any double colons in the module name, such as C<Some::Module>, are translated into your system's directory separator, usually "/". The second -case does not, and would have to be specified literally. The other difference -is that seeing the first C<require> clues in the compiler that uses of -indirect object notation involving "SomeModule", as in C<$ob = purge SomeModule>, -are method calls, not function calls. (Yes, this really can make a difference.) - -Because the C<use> statement implies a C<BEGIN> block, the importation -of semantics happens at the moment the C<use> statement is compiled, +case does not, and would have to be specified literally. The other +difference is that seeing the first C<require> clues in the compiler +that uses of indirect object notation involving "SomeModule", as +in C<$ob = purge SomeModule>, are method calls, not function calls. +(Yes, this really can make a difference.) + +Because the C<use> statement implies a C<BEGIN> block, the importing +of semantics happens as soon as the C<use> statement is compiled, before the rest of the file is compiled. This is how it is able to function as a pragma mechanism, and also how modules are able to -declare subroutines that are then visible as list operators for +declare subroutines that are then visible as list or unary operators for the rest of the current file. This will not work if you use C<require> -instead of C<use>. With require you can get into this problem: +instead of C<use>. With C<require> you can get into this problem: require Cwd; # make Cwd:: accessible $here = Cwd::getcwd(); @@ -354,22 +424,22 @@ filenames on some systems. Therefore, if a module's name is, say, C<Text::Soundex>, then its definition is actually found in the library file F<Text/Soundex.pm>. -Perl modules always have a F<.pm> file, but there may also be dynamically -linked executables or autoloaded subroutine definitions associated with -the module. If so, these will be entirely transparent to the user of -the module. It is the responsibility of the F<.pm> file to load (or -arrange to autoload) any additional functionality. The POSIX module -happens to do both dynamic loading and autoloading, but the user can -say just C<use POSIX> to get it all. - -For more information on writing extension modules, see L<perlxstut> -and L<perlguts>. +Perl modules always have a F<.pm> file, but there may also be +dynamically linked executables (often ending in F<.so>) or autoloaded +subroutine definitions (often ending in F<.al> associated with the +module. If so, these will be entirely transparent to the user of +the module. It is the responsibility of the F<.pm> file to load +(or arrange to autoload) any additional functionality. For example, +although the POSIX module happens to do both dynamic loading and +autoloading, but the user can say just C<use POSIX> to get it all. =head1 SEE ALSO See L<perlmodlib> for general style issues related to building Perl -modules and classes as well as descriptions of the standard library and -CPAN, L<Exporter> for how Perl's standard import/export mechanism works, -L<perltoot> for an in-depth tutorial on creating classes, L<perlobj> -for a hard-core reference document on objects, and L<perlsub> for an -explanation of functions and scoping. +modules and classes, as well as descriptions of the standard library +and CPAN, L<Exporter> for how Perl's standard import/export mechanism +works, L<perltoot> and L<perltootc> for an in-depth tutorial on +creating classes, L<perlobj> for a hard-core reference document on +objects, L<perlsub> for an explanation of functions and scoping, +and L<perlxstut> and L<perlguts> for more information on writing +extension modules. diff --git a/contrib/perl5/pod/perlmodinstall.pod b/contrib/perl5/pod/perlmodinstall.pod index b6176f0..19ffac9 100644 --- a/contrib/perl5/pod/perlmodinstall.pod +++ b/contrib/perl5/pod/perlmodinstall.pod @@ -5,21 +5,23 @@ perlmodinstall - Installing CPAN Modules =head1 DESCRIPTION You can think of a module as the fundamental unit of reusable Perl -code; see L<perlmod> for details. Whenever anyone creates a chunk of -Perl code that they think will be useful to the world, they register -as a Perl developer at http://www.perl.com/CPAN/modules/04pause.html -so that they can then upload their code to the CPAN. The CPAN is the -Comprehensive Perl Archive Network and can be accessed at -http://www.perl.com/CPAN/. +code; See L<perlmod> for details. Whenever anyone creates a chunk +of Perl code that they think will be useful to the world, they +register as a Perl developer at +http://www.perl.com/CPAN/modules/04pause.html so that they can then +upload their code to CPAN. CPAN is the Comprehensive Perl Archive +Network and can be accessed at http://www.perl.com/CPAN/, or searched +via http://cpan.perl.com/ and +http://theory.uwinnipeg.ca/mod_perl/cpan-search.pl . This documentation is for people who want to download CPAN modules and install them on their own computer. =head2 PREAMBLE -You have a file ending in .tar.gz (or, less often, .zip). You know -there's a tasty module inside. There are four steps you must now -take: +You have a file ending in F<.tar.gz> (or, less often, F<.zip>). +You know there's a tasty module inside. You must now take four +steps: =over 5 @@ -44,8 +46,8 @@ say C<perl Makefile.PL>, you can substitute C<perl Makefile.PL PREFIX=/my/perl_directory> to install the modules into C</my/perl_directory>. Then you can use the modules from your Perl programs with C<use lib -"/my/perl_directory/lib/site_perl";> or sometimes just C<use -"/my/perl_directory";>. +"/my/perl_directory/lib/site_perl"> or sometimes just C<use +"/my/perl_directory">. =over 4 @@ -54,7 +56,8 @@ from your Perl programs with C<use lib B<If you're on Unix,> You can use Andreas Koenig's CPAN module -( http://www.perl.com/CPAN/modules/by-module/CPAN ) +(which comes standard with Perl, or can itself be downloaded +from http://www.perl.com/CPAN/modules/by-module/CPAN) to automate the following steps, from DECOMPRESS through INSTALL. A. DECOMPRESS @@ -85,12 +88,17 @@ While still in that directory, type: make install -Make sure you have the appropriate permissions to install the module +Make sure you have appropriate permissions to install the module in your Perl 5 library directory. Often, you'll need to be root. +Perl maintains a record of all module installations. To look at +this list, simply type: + + perldoc perllocal + That's all you need to do on Unix systems with dynamic linking. -Most Unix systems have dynamic linking -- if yours doesn't, or if for -another reason you have a statically-linked perl, B<and> the +Most Unix systems have dynamic linking--if yours doesn't, or if for +another reason you have a statically-linked perl, I<and> the module requires compilation, you'll need to build a new Perl binary that includes the module. Again, you'll probably need to be root. @@ -100,7 +108,7 @@ B<If you're running Windows 95 or NT with the ActiveState port of Perl> A. DECOMPRESS -You can use the shareware Winzip ( http://www.winzip.com ) to +You can use the shareware B<Winzip> program ( http://www.winzip.com ) to decompress and unpack modules. B. UNPACK @@ -112,7 +120,7 @@ If you used WinZip, this was already done for you. Does the module require compilation (i.e. does it have files that end in .xs, .c, .h, .y, .cc, .cxx, or .C)? If it does, you're on your own. You can try compiling it yourself if you have a C compiler. -If you're successful, consider uploading the resulting binary to the +If you're successful, consider uploading the resulting binary to CPAN for others to use. If it doesn't, go to INSTALL. D. INSTALL @@ -129,17 +137,16 @@ B<If you're running Windows 95 or NT with the core Windows distribution of Perl, A. DECOMPRESS When you download the module, make sure it ends in either -C<.tar.gz> or C<.zip>. Windows browsers sometimes +F<.tar.gz> or F<.zip>. Windows browsers sometimes download C<.tar.gz> files as C<_tar.tar>, because early versions of Windows prohibited more than one dot in a filename. -You can use the shareware WinZip ( http://www.winzip.com ) to +You can use the shareware B<WinZip> program ( http://www.winzip.com ) to decompress and unpack modules. Or, you can use InfoZip's C<unzip> utility ( -http://www.cdrom.com/pub/infozip/Info-Zip.html ) to uncompress -C<.zip> files; type C<unzip yourmodule.zip> in -your shell. +http://www.cdrom.com/pub/infozip/ ) to uncompress C<.zip> files; type +C<unzip yourmodule.zip> in your shell. Or, if you have a working C<tar> and C<gzip>, you can type @@ -151,7 +158,7 @@ UNPACK your module as well. B. UNPACK -All of the methods in DECOMPRESS will have done this for you. +The methods in DECOMPRESS will have done this for you. C. BUILD @@ -185,20 +192,19 @@ Specificly the "Commpress & Translate" listing ( http://hyperarchive.lcs.mit.edu/HyperArchive/Abstracts/cmp/HyperArchive.html ). -You can either use the shareware StuffIt Expander -( http://hyperarchive.lcs.mit.edu/HyperArchive/Archive/cmp/stuffit-expander-401.hqx ) +You can either use the shareware B<StuffIt Expander> program +( http://www.aladdinsys.com/expander/ ) in combination with I<DropStuff with Expander Enhancer> -( http://hyperarchive.lcs.mit.edu/HyperArchive/Archive/cmp/drop-stuff-with-ee-40.hqx ) -or the freeware MacGzip ( +( http://www.aladdinsys.com/dropstuff/ ) +or the freeware B<MacGzip> program ( http://persephone.cps.unizar.es/general/gente/spd/gzip/gzip.html ). B. UNPACK If you're using DropStuff or Stuffit, you can just extract the tar -archive. Otherwise, you can use the freeware I<suntar> -( http://hyperarchive.lcs.mit.edu/HyperArchive/Archive/cmp/suntar-221.hqx ) -or I<Tar> ( http://hyperarchive.lcs.mit.edu/HyperArchive/Archive/cmp/tar-40b.hqx ). +archive. Otherwise, you can use the freeware B<suntar> or I<Tar> ( +http://hyperarchive.lcs.mit.edu/HyperArchive/Archive/cmp/ ). C. BUILD @@ -208,9 +214,9 @@ Does the module require compilation? Overview: You need MPW and a combination of new and old CodeWarrior compilers for MPW and libraries. Makefiles created for building under -MPW use the Metrowerks compilers. It's most likely possible to build +MPW use Metrowerks compilers. It's most likely possible to build without other compilers, but it has not been done successfully, to our -knowledge. Read the documentation in MacPerl: Power and Ease ( +knowledge. Read the documentation in I<MacPerl: Power and Ease> ( http://www.ptf.com/macperl/ ) on porting/building extensions, or find an existing precompiled binary, or hire someone to build it for you. @@ -226,9 +232,10 @@ Make sure the newlines for the modules are in Mac format, not Unix format. If they are not then you might have decompressed them incorrectly. Check your decompression and unpacking utilities settings to make sure they are translating text files properly. -As a last resort, you can use the perl one-liner: - perl -i.bak -pe 's/(?:\015)?\012/\015/g' filenames +As a last resort, you can use the perl one-liner: + + perl -i.bak -pe 's/(?:\015)?\012/\015/g' <filenames> on the source files. @@ -275,7 +282,7 @@ Go into the newly-created directory and type: make make test -You will need the packages mentioned in C<Readme.dos> +You will need the packages mentioned in F<README.dos> in the Perl distribution. D. INSTALL @@ -283,8 +290,8 @@ in the Perl distribution. While still in that directory, type: make install - -You will need the packages mentioned in Readme.dos in the Perl distribution. + +You will need the packages mentioned in F<README.dos> in the Perl distribution. =item * @@ -298,8 +305,8 @@ the instructions for Unix. B<If you're on VMS,> -When downloading from CPAN, save your file with a C<.tgz> -extension instead of C<.tar.gz>. All other periods in the +When downloading from CPAN, save your file with a F<.tgz> +extension instead of F<.tar.gz>. All other periods in the filename should be replaced with underscores. For example, C<Your-Module-1.33.tar.gz> should be downloaded as C<Your-Module-1_33.tgz>. @@ -315,8 +322,8 @@ or, for zipped modules, type unzip Your-Module.zip Executables for gzip, zip, and VMStar ( Alphas: -http://www.openvms.digital.com/cd/000TOOLS/ALPHA/ and Vaxen: -http://www.openvms.digital.com/cd/000TOOLS/VAX/ ). +http://www.openvms.digital.com/freeware/000TOOLS/ALPHA/ and Vaxen: +http://www.openvms.digital.com/freeware/000TOOLS/VAX/ ). gzip and tar are also available at ftp://ftp.digital.com/pub/VMS. @@ -361,7 +368,7 @@ Substitute C<mmk> for C<mms> above if you're using MMK. B<If you're on MVS>, -Introduce the .tar.gz file into an HFS as binary; don't translate from +Introduce the F<.tar.gz> file into an HFS as binary; don't translate from ASCII to EBCDIC. A. DECOMPRESS diff --git a/contrib/perl5/pod/perlmodlib.pod b/contrib/perl5/pod/perlmodlib.pod index d6c6b32..b42a2d8 100644 --- a/contrib/perl5/pod/perlmodlib.pod +++ b/contrib/perl5/pod/perlmodlib.pod @@ -6,102 +6,141 @@ perlmodlib - constructing new Perl modules and finding existing ones =head1 THE PERL MODULE LIBRARY -A number of modules are included the Perl distribution. These are -described below, and all end in F<.pm>. You may also discover files in -the library directory that end in either F<.pl> or F<.ph>. These are old -libraries supplied so that old programs that use them still run. The -F<.pl> files will all eventually be converted into standard modules, and -the F<.ph> files made by B<h2ph> will probably end up as extension modules -made by B<h2xs>. (Some F<.ph> values may already be available through the -POSIX module.) The B<pl2pm> file in the distribution may help in your -conversion, but it's just a mechanical process and therefore far from -bulletproof. +Many modules are included the Perl distribution. These are described +below, and all end in F<.pm>. You may discover compiled library +file (usually ending in F<.so>) or small pieces of modules to be +autoloaded (ending in F<.al>); these were automatically generated +by the installation process. You may also discover files in the +library directory that end in either F<.pl> or F<.ph>. These are +old libraries supplied so that old programs that use them still +run. The F<.pl> files will all eventually be converted into standard +modules, and the F<.ph> files made by B<h2ph> will probably end up +as extension modules made by B<h2xs>. (Some F<.ph> values may +already be available through the POSIX, Errno, or Fcntl modules.) +The B<pl2pm> file in the distribution may help in your conversion, +but it's just a mechanical process and therefore far from bulletproof. =head2 Pragmatic Modules -They work somewhat like pragmas in that they tend to affect the compilation of -your program, and thus will usually work well only when used within a -C<use>, or C<no>. Most of these are lexically scoped, so an inner BLOCK -may countermand any of these by saying: +They work somewhat like compiler directives (pragmata) in that they +tend to affect the compilation of your program, and thus will usually +work well only when used within a C<use>, or C<no>. Most of these +are lexically scoped, so an inner BLOCK may countermand them +by saying: no integer; no strict 'refs'; + no warnings; which lasts until the end of that BLOCK. -Unlike the pragmas that effect the C<$^H> hints variable, the C<use -vars> and C<use subs> declarations are not BLOCK-scoped. They allow -you to predeclare a variables or subroutines within a particular -I<file> rather than just a block. Such declarations are effective -for the entire file for which they were declared. You cannot rescind -them with C<no vars> or C<no subs>. +Some pragmas are lexically scoped--typically those that affect the +C<$^H> hints variable. Others affect the current package instead, +like C<use vars> and C<use subs>, which allow you to predeclare a +variables or subroutines within a particular I<file> rather than +just a block. Such declarations are effective for the entire file +for which they were declared. You cannot rescind them with C<no +vars> or C<no subs>. The following pragmas are defined (and have their own documentation). =over 12 -=item use autouse MODULE => qw(sub1 sub2 sub3) +=item attributes -Defers C<require MODULE> until someone calls one of the specified -subroutines (which must be exported by MODULE). This pragma should be -used with caution, and only when necessary. +Get/set subroutine or variable attributes + +=item attrs + +Set/get attributes of a subroutine (deprecated) + +=item autouse + +Postpone load of modules until a function is used + +=item base + +Establish IS-A relationship with base class at compile time =item blib -manipulate @INC at compile time to use MakeMaker's uninstalled version -of a package +Use MakeMaker's uninstalled version of a package + +=item caller + +Inherit pragmatic attributes from caller's context + +=item charnames + +Define character names for C<\N{named}> string literal escape. + +=item constant + +Declare constants =item diagnostics -force verbose warning diagnostics +Force verbose warning diagnostics + +=item fields + +Declare a class's attribute fields at compile-time + +=item filetest + +Control the filetest operators like C<-r>, C<-w> for AFS, etc. =item integer -compute arithmetic in integer instead of double +Compute arithmetic in integer instead of double =item less -request less of something from the compiler +Request less of something from the compiler (unimplemented) =item lib -manipulate @INC at compile time +Manipulate @INC at compile time =item locale -use or ignore current locale for builtin operations (see L<perllocale>) +Use or avoid POSIX locales for built-in operations =item ops -restrict named opcodes when compiling or running Perl code +Restrict unsafe operations when compiling =item overload -overload basic Perl operations +Overload Perl operations =item re -alter behaviour of regular expressions +Alter regular expression behavior =item sigtrap -enable simple signal handling +Enable simple signal handling =item strict -restrict unsafe constructs +Restrict unsafe constructs =item subs -predeclare sub names +Predeclare subroutine names -=item vmsish +=item utf8 -adopt certain VMS-specific behaviors +Turn on UTF-8 and Unicode support =item vars -predeclare global variable names +Predeclare global variable names (obsoleted by our()) + +=item warnings + +Control optional warnings =back @@ -115,211 +154,399 @@ Exporter module. See their own documentation for details. =item AnyDBM_File -provide framework for multiple DBMs +Provide framework for multiple DBM libraries =item AutoLoader -load functions only on demand +Load subroutines only on demand =item AutoSplit -split a package for autoloading +Split a package for autoloading + +=item B + +Guts of the Perl code generator (aka compiler) + +=item B::Asmdata + +Autogenerated data about Perl ops, used to generate bytecode + +=item B::Assembler + +Assemble Perl bytecode + +=item B::Bblock + +Walk basic blocks + +=item B::Bytecode + +Perl compiler's bytecode backend + +=item B::C + +Perl compiler's C backend + +=item B::CC + +Perl compiler's optimized C translation backend + +=item B::Debug + +Walk Perl syntax tree, printing debug info about ops + +=item B::Deparse + +Perl compiler backend to produce Perl code + +=item B::Disassembler + +Disassemble Perl bytecode + +=item B::Lint + +Module to catch dubious constructs + +=item B::Showlex + +Show lexical variables used in functions or files + +=item B::Stackobj + +Helper module for CC backend + +B::Stash -- XXX NFI XXX + +=item B::Terse + +Walk Perl syntax tree, printing terse info about ops + +=item B::Xref + +Generates cross reference reports for Perl programs =item Benchmark -benchmark running times of code +Benchmark running times of code + +=item ByteLoader + +Load byte-compiled Perl code + +=item CGI + +Simple Common Gateway Interface class + +=item CGI::Apache + +Make things work with CGI.pm against Perl-Apache API + +=item CGI::Carp + +CGI routines for writing to the HTTPD (or other) error log + +=item CGI::Cookie + +Interface to Netscape Cookies + +=item CGI::Fast + +CGI Interface for Fast CGI + +=item CGI::Pretty + +Module to produce nicely formatted HTML code + +=item CGI::Push + +Simple Interface to Server Push + +=item CGI::Switch + +Try more than one constructors and return the first object available =item CPAN -interface to Comprehensive Perl Archive Network +Query, download, and build Perl modules from CPAN sites =item CPAN::FirstTime -create a CPAN configuration file +Utility for CPAN::Config file initialization =item CPAN::Nox -run CPAN while avoiding compiled extensions +Wrapper around CPAN.pm without using any XS module =item Carp -warn of errors (from perspective of caller) +Act like warn/die from perspective of caller + +=item Carp::Heavy + +Carp guts =item Class::Struct -declare struct-like datatypes +Declare struct-like datatypes as Perl classes =item Config -access Perl configuration information +Access Perl configuration information =item Cwd -get pathname of current working directory +Get pathname of current working directory + +=item DB + +Programmatic interface to the Perl debugging API (experimental) =item DB_File -access to Berkeley DB +Perl5 access to Berkeley DB version 1.x + +=item Data::Dumper + +Serialize Perl data structures + +=item Devel::DProf + +A Perl execution profiler + +=item Devel::Peek + +A data debugging tool for the XS programmer =item Devel::SelfStubber -generate stubs for a SelfLoading module +Generate stubs for a SelfLoading module =item DirHandle -supply object methods for directory handles +Supply object methods for directory handles + +=item Dumpvalue + +Provide screen dump of Perl data =item DynaLoader -dynamically load C libraries into Perl code +Dynamically load C libraries into Perl code =item English -use nice English (or awk) names for ugly punctuation variables +Use English (or awk) names for ugly punctuation variables =item Env -import environment variables +Access environment variables as regular ones + +=item Errno + +Load the libc errno.h defines =item Exporter -implements default import method for modules +Implement default import method for modules + +=item Exporter::Heavy + +Exporter guts + +=item ExtUtils::Command + +Utilities to replace common Unix commands in Makefiles etc. =item ExtUtils::Embed -utilities for embedding Perl in C/C++ applications +Utilities for embedding Perl in C/C++ programs =item ExtUtils::Install -install files from here to there +Install files from here to there + +=item ExtUtils::Installed + +Inventory management of installed modules =item ExtUtils::Liblist -determine libraries to use and how to use them +Determine libraries to use and how to use them + +=item ExtUtils::MM_Cygwin + +Methods to override Unix behavior in ExtUtils::MakeMaker =item ExtUtils::MM_OS2 -methods to override Unix behaviour in ExtUtils::MakeMaker +Methods to override Unix behavior in ExtUtils::MakeMaker =item ExtUtils::MM_Unix -methods used by ExtUtils::MakeMaker +Methods used by ExtUtils::MakeMaker =item ExtUtils::MM_VMS -methods to override Unix behaviour in ExtUtils::MakeMaker +Methods to override Unix behavior in ExtUtils::MakeMaker + +=item ExtUtils::MM_Win32 + +Methods to override Unix behavior in ExtUtils::MakeMaker =item ExtUtils::MakeMaker -create an extension Makefile +Create an extension Makefile =item ExtUtils::Manifest -utilities to write and check a MANIFEST file +Utilities to write and check a MANIFEST file + +ExtUtils::Miniperl, writemain - Write the C code for perlmain.c =item ExtUtils::Mkbootstrap -make a bootstrap file for use by DynaLoader +Make a bootstrap file for use by DynaLoader =item ExtUtils::Mksymlists -write linker options files for dynamic extension +Write linker options files for dynamic extension + +=item ExtUtils::Packlist + +Manage .packlist files =item ExtUtils::testlib -add blib/* directories to @INC +Add blib/* directories to @INC =item Fatal -make errors in builtins or Perl functions fatal +Replace functions with equivalents which succeed or die =item Fcntl -load the C Fcntl.h defines +Load the libc fcntl.h defines =item File::Basename -split a pathname into pieces +Split a pathname into pieces =item File::CheckTree -run many filetest checks on a tree +Run many filetest checks on a tree =item File::Compare -compare files or filehandles +Compare files or filehandles =item File::Copy -copy files or filehandles +Copy files or filehandles + +=item File::DosGlob + +DOS-like globbing and then some =item File::Find -traverse a file tree +Traverse a file tree + +=item File::Glob + +Perl extension for BSD filename globbing =item File::Path -create or remove a series of directories +Create or remove a series of directories =item File::Spec -portably perform operations on file names +Portably perform operations on file names =item File::Spec::Functions -function call interface to File::Spec module +Portably perform operations on file names + +=item File::Spec::Mac + +File::Spec for MacOS + +=item File::Spec::OS2 + +Methods for OS/2 file specs + +=item File::Spec::Unix + +Methods used by File::Spec + +=item File::Spec::VMS + +Methods for VMS file specs + +=item File::Spec::Win32 + +Methods for Win32 file specs =item File::stat -by-name interface to Perl's builtin stat() functions +By-name interface to Perl's built-in stat() functions =item FileCache -keep more files open than the system permits +Keep more files open than the system permits =item FileHandle -supply object methods for filehandles +Supply object methods for filehandles =item FindBin -locate directory of original Perl script +Locate installation directory of running Perl program =item GDBM_File -access to the gdbm library +Access to the gdbm library =item Getopt::Long -extended processing of command line options +Extended processing of command line options =item Getopt::Std -process single-character switches with switch clustering +Process single-character switches with switch clustering =item I18N::Collate -compare 8-bit scalar data according to the current locale +Compare 8-bit scalar data according to current locale =item IO -load various IO modules +Front-end to load various IO modules + +=item IO::Dir + +Supply object methods for directory handles =item IO::File -supply object methods for filehandles +Supply object methods for filehandles =item IO::Handle -supply object methods for I/O handles +Supply object methods for I/O handles =item IO::Pipe -supply object methods for pipes +Supply object methods for pipes + +=item IO::Poll + +Object interface to system poll call =item IO::Seekable -supply seek based methods for I/O objects +Supply seek based methods for I/O objects =item IO::Select @@ -327,219 +554,297 @@ OO interface to the select system call =item IO::Socket -object interface to socket communications +Object interface to socket communications + +=item IO::Socket::INET + +Object interface for AF_INET domain sockets + +=item IO::Socket::UNIX + +Object interface for AF_UNIX domain sockets + +=item IPC::Msg + +SysV Msg IPC object class =item IPC::Open2 -open a process for both reading and writing +Open a process for both reading and writing =item IPC::Open3 -open a process for reading, writing, and error handling +Open a process for reading, writing, and error handling + +=item IPC::Semaphore + +SysV Semaphore IPC object class + +=item IPC::SysV + +SysV IPC constants =item Math::BigFloat -arbitrary length float math package +Arbitrary length float math package =item Math::BigInt -arbitrary size integer math package +Arbitrary size integer math package =item Math::Complex -complex numbers and associated mathematical functions +Complex numbers and associated mathematical functions =item Math::Trig -simple interface to parts of Math::Complex for those who -need trigonometric functions only for real numbers - -=item NDBM_File - -tied access to ndbm files +Trigonometric functions =item Net::Ping -Hello, anybody home? +Check a remote host for reachability =item Net::hostent -by-name interface to Perl's builtin gethost*() functions +By-name interface to Perl's built-in gethost*() functions =item Net::netent -by-name interface to Perl's builtin getnet*() functions +By-name interface to Perl's built-in getnet*() functions =item Net::protoent -by-name interface to Perl's builtin getproto*() functions +By-name interface to Perl's built-in getproto*() functions =item Net::servent -by-name interface to Perl's builtin getserv*() functions +By-name interface to Perl's built-in getserv*() functions + +=item O + +Generic interface to Perl Compiler backends =item Opcode -disable named opcodes when compiling or running Perl code +Disable named opcodes when compiling Perl code + +=item POSIX + +Perl interface to IEEE Std 1003.1 + +=item Pod::Checker + +Check pod documents for syntax errors + +=item Pod::Html + +Module to convert pod files to HTML + +=item Pod::InputObjects + +Manage POD objects + +=item Pod::Man + +Convert POD data to formatted *roff input + +=item Pod::Parser + +Base class for creating POD filters and translators + +=item Pod::Select + +Extract selected sections of POD from input =item Pod::Text -convert POD data to formatted ASCII text +Convert POD data to formatted ASCII text -=item POSIX +=item Pod::Text::Color + +Convert POD data to formatted color ASCII text + +=item Pod::Usage -interface to IEEE Standard 1003.1 +Print a usage message from embedded pod documentation =item SDBM_File -tied access to sdbm files +Tied access to sdbm files =item Safe -compile and execute code in restricted compartments +Compile and execute code in restricted compartments =item Search::Dict -search for key in dictionary file +Search for key in dictionary file =item SelectSaver -save and restore selected file handle +Save and restore selected file handle =item SelfLoader -load functions only on demand +Load functions only on demand =item Shell -run shell commands transparently within Perl +Run shell commands transparently within Perl =item Socket -load the C socket.h defines and structure manipulators +Load the libc socket.h defines and structure manipulators =item Symbol -manipulate Perl symbols and their names +Manipulate Perl symbols and their names =item Sys::Hostname -try every conceivable way to get hostname +Try every conceivable way to get hostname =item Sys::Syslog -interface to the Unix syslog(3) calls +Interface to the libc syslog(3) calls =item Term::Cap -termcap interface +Termcap interface =item Term::Complete -word completion module +Word completion module =item Term::ReadLine -interface to various C<readline> packages +Interface to various `readline' packages. + +=item Test + +Provides a simple framework for writing test scripts =item Test::Harness -run Perl standard test scripts with statistics +Run Perl standard test scripts with statistics =item Text::Abbrev -create an abbreviation table from a list +Create an abbreviation table from a list =item Text::ParseWords -parse text into an array of tokens +Parse text into a list of tokens or array of arrays =item Text::Soundex -implementation of the Soundex Algorithm as described by Knuth +Implementation of the Soundex Algorithm as described by Knuth -=item Text::Tabs - -expand and unexpand tabs per the Unix expand(1) and unexpand(1) +Text::Tabs -- expand and unexpand tabs per expand(1) and unexpand(1) =item Text::Wrap -line wrapping to form simple paragraphs +Line wrapping to form simple paragraphs + +=item Tie::Array + +Base class for tied arrays + +=item Tie::Handle + +Base class definitions for tied handles =item Tie::Hash -base class definitions for tied hashes +Base class definitions for tied hashes =item Tie::RefHash -base class definitions for tied hashes with references as keys +Use references as hash keys =item Tie::Scalar -base class definitions for tied scalars +Base class definitions for tied scalars =item Tie::SubstrHash -fixed-table-size, fixed-key-length hashing +Fixed-table-size, fixed-key-length hashing =item Time::Local -efficiently compute time from local and GMT time +Efficiently compute time from local and GMT time =item Time::gmtime -by-name interface to Perl's builtin gmtime() function +By-name interface to Perl's built-in gmtime() function =item Time::localtime -by-name interface to Perl's builtin localtime() function +By-name interface to Perl's built-in localtime() function =item Time::tm -internal object used by Time::gmtime and Time::localtime +Internal object used by Time::gmtime and Time::localtime =item UNIVERSAL -base class for ALL classes (blessed references) +Base class for ALL classes (blessed references) =item User::grent -by-name interface to Perl's builtin getgr*() functions +By-name interface to Perl's built-in getgr*() functions =item User::pwent -by-name interface to Perl's builtin getpw*() functions +By-name interface to Perl's built-in getpw*() functions =back -To find out I<all> the modules installed on your system, including -those without documentation or outside the standard release, do this: +To find out I<all> modules installed on your system, including +those without documentation or outside the standard release, +just do this: % find `perl -e 'print "@INC"'` -name '*.pm' -print -They should all have their own documentation installed and accessible via -your system man(1) command. If that fails, try the I<perldoc> program. +To get a log of all module distributions which have been installed +since perl was installed, just do: + + % perldoc perllocal + +Modules should all have their own documentation installed and accessible +via your system man(1) command, or via the C<perldoc> program. If you do +not have a B<find> +program, you can use the Perl B<find2perl> program instead, which +generates Perl code as output you can run through perl. If you +have a B<man> program but it doesn't find your modules, you'll have +to fix your manpath. See L<perl> for details. =head2 Extension Modules -Extension modules are written in C (or a mix of Perl and C) and may be -statically linked or in general are -dynamically loaded into Perl if and when you need them. Supported -extension modules include the Socket, Fcntl, and POSIX modules. +Extension modules are written in C (or a mix of Perl and C). They +are usually dynamically loaded into Perl if and when you need them, +but may also be be linked in statically. Supported extension modules +include Socket, Fcntl, and POSIX. Many popular C extension modules do not come bundled (at least, not -completely) due to their sizes, volatility, or simply lack of time for -adequate testing and configuration across the multitude of platforms on -which Perl was beta-tested. You are encouraged to look for them in -archie(1L), the Perl FAQ or Meta-FAQ, the WWW page, and even with their -authors before randomly posting asking for their present condition and -disposition. +completely) due to their sizes, volatility, or simply lack of time +for adequate testing and configuration across the multitude of +platforms on which Perl was beta-tested. You are encouraged to +look for them on CPAN (described below), or using web search engines +like Alta Vista or Deja News. =head1 CPAN -CPAN stands for the Comprehensive Perl Archive Network. This is a globally -replicated collection of all known Perl materials, including hundreds -of unbundled modules. Here are the major categories of modules: +CPAN stands for Comprehensive Perl Archive Network; it's a globally +replicated trove of Perl materials, including documentation, style +guides, tricks and trap, alternate ports to non-Unix systems and +occasional binary distributions for these. Search engines for +CPAN can be found at http://cpan.perl.com/ and at +http://theory.uwinnipeg.ca/mod_perl/cpan-search.pl . + +Most importantly, CPAN includes around a thousand unbundled modules, +some of which require a C compiler to build. Major categories of +modules are: =over @@ -608,146 +913,175 @@ Miscellaneous Modules =back -The registered CPAN sites as of this writing include the following. +Registered CPAN sites as of this writing include the following. You should try to choose one close to you: =over -=item * -Africa +=item Africa - South Africa ftp://ftp.is.co.za/programming/perl/CPAN/ + South Africa ftp://ftp.is.co.za/programming/perl/CPAN/ + ftp://ftp.saix.net/pub/CPAN/ + ftp://ftp.sun.ac.za/CPAN/ ftp://ftpza.co.za/pub/mirrors/cpan/ -=item * -Asia - Armenia ftp://sunsite.aua.am/pub/CPAN/ - China ftp://freesoft.cei.gov.cn/pub/languages/perl/CPAN/ - Hong Kong ftp://ftp.hkstar.com/pub/CPAN/ - Israel ftp://bioinfo.weizmann.ac.il/pub/software/perl/CPAN/ - Japan ftp://ftp.dti.ad.jp/pub/lang/CPAN/ +=item Asia + + China ftp://freesoft.cei.gov.cn/pub/languages/perl/CPAN/ + Hong Kong ftp://ftp.pacific.net.hk/pub/mirror/CPAN/ + Indonesia ftp://malone.piksi.itb.ac.id/pub/CPAN/ + Israel ftp://bioinfo.weizmann.ac.il/pub/software/perl/CPAN/ + Japan ftp://ftp.dti.ad.jp/pub/lang/CPAN/ ftp://ftp.jaist.ac.jp/pub/lang/perl/CPAN/ ftp://ftp.lab.kdd.co.jp/lang/perl/CPAN/ ftp://ftp.meisei-u.ac.jp/pub/CPAN/ + ftp://ftp.ring.gr.jp/pub/lang/perl/CPAN/ ftp://mirror.nucba.ac.jp/mirror/Perl/ - Singapore ftp://ftp.nus.edu.sg/pub/unix/perl/CPAN/ - South Korea ftp://ftp.bora.net/pub/CPAN/ + Saudi-Arabia ftp://ftp.isu.net.sa/pub/CPAN/ + Singapore ftp://ftp.nus.edu.sg/pub/unix/perl/CPAN/ + South Korea ftp://ftp.bora.net/pub/CPAN/ + ftp://ftp.kornet.net/pub/CPAN/ ftp://ftp.nuri.net/pub/CPAN/ - Taiwan ftp://ftp.wownet.net/pub2/PERL/ + Taiwan ftp://coda.nctu.edu.tw/computer-languages/perl/CPAN/ + ftp://ftp.ee.ncku.edu.tw/pub3/perl/CPAN/ ftp://ftp1.sinica.edu.tw/pub1/perl/CPAN/ - Thailand ftp://ftp.cs.riubon.ac.th/pub/mirrors/CPAN/ - ftp://ftp.nectec.or.th/pub/mirrors/CPAN/ + Thailand ftp://ftp.nectec.or.th/pub/mirrors/CPAN/ -=item * -Australasia - Australia ftp://cpan.topend.com.au/pub/CPAN/ - ftp://ftp.labyrinth.net.au/pub/perl/CPAN/ +=item Australasia + + Australia ftp://cpan.topend.com.au/pub/CPAN/ + ftp://ftp.labyrinth.net.au/pub/perl-CPAN/ ftp://ftp.sage-au.org.au/pub/compilers/perl/CPAN/ ftp://mirror.aarnet.edu.au/pub/perl/CPAN/ - New Zealand ftp://ftp.auckland.ac.nz/pub/perl/CPAN/ + New Zealand ftp://ftp.auckland.ac.nz/pub/perl/CPAN/ ftp://sunsite.net.nz/pub/languages/perl/CPAN/ -=item * -Central America - Costa Rica ftp://ftp.ucr.ac.cr/pub/Unix/CPAN/ +=item Central America -=item * -Europe + Costa Rica ftp://ftp.ucr.ac.cr/pub/Unix/CPAN/ + + +=item Europe - Austria ftp://ftp.tuwien.ac.at/pub/languages/perl/CPAN/ - Belgium ftp://ftp.kulnet.kuleuven.ac.be/pub/mirror/CPAN/ - Bulgaria ftp://ftp.ntrl.net/pub/mirrors/CPAN/ - Croatia ftp://ftp.linux.hr/pub/CPAN/ - Czech Republic ftp://ftp.fi.muni.cz/pub/perl/ + Austria ftp://ftp.tuwien.ac.at/pub/languages/perl/CPAN/ + Belgium ftp://ftp.kulnet.kuleuven.ac.be/pub/mirror/CPAN/ + Bulgaria ftp://ftp.ntrl.net/pub/mirrors/CPAN/ + Croatia ftp://ftp.linux.hr/pub/CPAN/ + Czech Republic ftp://ftp.fi.muni.cz/pub/perl/ ftp://sunsite.mff.cuni.cz/Languages/Perl/CPAN/ - Denmark ftp://sunsite.auc.dk/pub/languages/perl/CPAN/ - Estonia ftp://ftp.ut.ee/pub/languages/perl/CPAN/ - Finland ftp://ftp.funet.fi/pub/languages/perl/CPAN/ - France ftp://ftp.lip6.fr/pub/perl/CPAN/ + Denmark ftp://sunsite.auc.dk/pub/languages/perl/CPAN/ + Estonia ftp://ftp.ut.ee/pub/languages/perl/CPAN/ + Finland ftp://ftp.funet.fi/pub/languages/perl/CPAN/ + France ftp://ftp.grolier.fr/pub/perl/CPAN/ + ftp://ftp.lip6.fr/pub/perl/CPAN/ ftp://ftp.oleane.net/pub/mirrors/CPAN/ ftp://ftp.pasteur.fr/pub/computing/CPAN/ - Germany ftp://ftp.archive.de.uu.net/pub/CPAN/ + ftp://ftp.uvsq.fr/pub/perl/CPAN/ + German ftp://ftp.gigabell.net/pub/CPAN/ + Germany ftp://ftp.archive.de.uu.net/pub/CPAN/ + ftp://ftp.freenet.de/pub/ftp.cpan.org/pub/ + ftp://ftp.gmd.de/packages/CPAN/ + ftp://ftp.gwdg.de/pub/languages/perl/CPAN/ + ftp://ftp.leo.org/pub/comp/general/programming/languages/script/perl/CPAN/ + ftp://ftp.mpi-sb.mpg.de/pub/perl/CPAN/ + ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN/ + ftp://ftp.uni-erlangen.de/pub/source/CPAN/ + ftp://ftp.uni-hamburg.de/pub/soft/lang/perl/CPAN/ + Germany ftp://ftp.archive.de.uu.net/pub/CPAN/ + ftp://ftp.freenet.de/pub/ftp.cpan.org/pub/ ftp://ftp.gmd.de/packages/CPAN/ ftp://ftp.gwdg.de/pub/languages/perl/CPAN/ - ftp://ftp.leo.org/pub/comp/programming/languages/script/perl/CPAN/ + ftp://ftp.leo.org/pub/comp/general/programming/languages/script/perl/CPAN/ ftp://ftp.mpi-sb.mpg.de/pub/perl/CPAN/ ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN/ ftp://ftp.uni-erlangen.de/pub/source/CPAN/ ftp://ftp.uni-hamburg.de/pub/soft/lang/perl/CPAN/ - Greece ftp://ftp.ntua.gr/pub/lang/perl/ - Hungary ftp://ftp.kfki.hu/pub/packages/perl/CPAN/ - Ireland ftp://sunsite.compapp.dcu.ie/pub/perl/ - Italy ftp://cis.uniRoma2.it/CPAN/ + Greece ftp://ftp.ntua.gr/pub/lang/perl/ + Hungary ftp://ftp.kfki.hu/pub/packages/perl/CPAN/ + Iceland ftp://ftp.gm.is/pub/CPAN/ + Ireland ftp://cpan.indigo.ie/pub/CPAN/ + ftp://sunsite.compapp.dcu.ie/pub/perl/ + Italy ftp://cis.uniRoma2.it/CPAN/ ftp://ftp.flashnet.it/pub/CPAN/ + ftp://ftp.unina.it/pub/Other/CPAN/ ftp://ftp.unipi.it/pub/mirror/perl/CPAN/ - Netherlands ftp://ftp.cs.uu.nl/mirror/CPAN/ + Netherlands ftp://ftp.cs.uu.nl/mirror/CPAN/ ftp://ftp.nluug.nl/pub/languages/perl/CPAN/ - Norway ftp://ftp.uit.no/pub/languages/perl/cpan/ + Norway ftp://ftp.uit.no/pub/languages/perl/cpan/ ftp://sunsite.uio.no/pub/languages/perl/CPAN/ - Poland ftp://ftp.man.szczecin.pl/pub/perl/CPAN/ - ftp://ftp.man.torun.pl/pub/doc/CPAN/ + Poland ftp://ftp.man.torun.pl/pub/CPAN/ ftp://ftp.pk.edu.pl/pub/lang/perl/CPAN/ ftp://sunsite.icm.edu.pl/pub/CPAN/ - Portugal ftp://ftp.ci.uminho.pt/pub/mirrors/cpan/ + Portugal ftp://ftp.ci.uminho.pt/pub/mirrors/cpan/ + ftp://ftp.ist.utl.pt/pub/CPAN/ ftp://ftp.ua.pt/pub/CPAN/ - Romania ftp://ftp.dntis.ro/pub/mirrors/perl-cpan/ - ftp://ftp.dnttm.ro/pub/CPAN/ - Russia ftp://cpan.npi.msu.su/CPAN/ + Romania ftp://ftp.dnttm.ro/pub/CPAN/ + Russia ftp://ftp.chg.ru/pub/lang/perl/CPAN/ ftp://ftp.sai.msu.su/pub/lang/perl/CPAN/ - Slovakia ftp://ftp.entry.sk/pub/languages/perl/CPAN/ - Slovenia ftp://ftp.arnes.si/software/perl/CPAN/ - Spain ftp://ftp.etse.urv.es/pub/perl/ + Slovakia ftp://ftp.entry.sk/pub/languages/perl/CPAN/ + Slovenia ftp://ftp.arnes.si/software/perl/CPAN/ + Spain ftp://ftp.etse.urv.es/pub/perl/ ftp://ftp.rediris.es/mirror/CPAN/ - Sweden ftp://ftp.sunet.se/pub/lang/perl/CPAN/ - Switzerland ftp://sunsite.cnlab-switch.ch/mirror/CPAN/ - Turkey ftp://sunsite.bilkent.edu.tr/pub/languages/CPAN/ - United Kingdom ftp://ftp.demon.co.uk/pub/mirrors/perl/CPAN/ + Sweden ftp://ftp.sunet.se/pub/lang/perl/CPAN/ + Switzerland ftp://sunsite.cnlab-switch.ch/mirror/CPAN/ + Turkey ftp://sunsite.bilkent.edu.tr/pub/languages/CPAN/ + United Kingdom ftp://ftp.demon.co.uk/pub/mirrors/perl/CPAN/ ftp://ftp.flirble.org/pub/languages/perl/CPAN/ + ftp://ftp.mirror.ac.uk/sites/ftp.funet.fi/pub/languages/perl/CPAN/ ftp://ftp.plig.org/pub/CPAN/ ftp://sunsite.doc.ic.ac.uk/packages/CPAN/ - ftp://unix.hensa.ac.uk/mirrors/perl-CPAN/ -=item * -North America - - Alberta ftp://sunsite.ualberta.ca/pub/Mirror/CPAN/ - California ftp://ftp.cdrom.com/pub/perl/CPAN/ - ftp://ftp.digital.com/pub/plan/perl/CPAN/ - Colorado ftp://ftp.cs.colorado.edu/pub/perl/CPAN/ - Florida ftp://ftp.cise.ufl.edu/pub/perl/CPAN/ - Illinois ftp://uiarchive.uiuc.edu/pub/lang/perl/CPAN/ - Indiana ftp://csociety-ftp.ecn.purdue.edu/pub/CPAN/ + +=item North America + + Alberta ftp://sunsite.ualberta.ca/pub/Mirror/CPAN/ + California ftp://cpan.nas.nasa.gov/pub/perl/CPAN/ + ftp://cpan.valueclick.com/CPAN/ + ftp://ftp.cdrom.com/pub/perl/CPAN/ + http://download.sourceforge.net/mirrors/CPAN/ + Colorado ftp://ftp.cs.colorado.edu/pub/perl/CPAN/ + Florida ftp://ftp.cise.ufl.edu/pub/perl/CPAN/ + Georgia ftp://ftp.twoguys.org/CPAN/ + Illinois ftp://uiarchive.uiuc.edu/pub/lang/perl/CPAN/ + Indiana ftp://csociety-ftp.ecn.purdue.edu/pub/CPAN/ ftp://ftp.uwsg.indiana.edu/pub/perl/CPAN/ - Manitoba ftp://theory.uwinnipeg.ca/pub/CPAN/ - Massachusetts ftp://ftp.ccs.neu.edu/net/mirrors/ftp.funet.fi/pub/languages/perl/CPAN/ + Kentucky ftp://ftp.uky.edu/CPAN/ + Manitoba ftp://theoryx5.uwinnipeg.ca/pub/CPAN/ + Massachusetts ftp://ftp.ccs.neu.edu/net/mirrors/ftp.funet.fi/pub/languages/perl/CPAN/ ftp://ftp.iguide.com/pub/mirrors/packages/perl/CPAN/ - Mexico D.F. ftp://ftp.msg.com.mx/pub/CPAN/ - New York ftp://ftp.rge.com/pub/languages/perl/ - North Carolina ftp://ftp.duke.edu/pub/perl/ - Oklahoma ftp://ftp.ou.edu/mirrors/CPAN/ - Ontario ftp://ftp.crc.ca/pub/packages/perl/CPAN/ - Oregon ftp://ftp.orst.edu/pub/packages/CPAN/ - Pennsylvania ftp://ftp.epix.net/pub/languages/perl/ - Texas ftp://ftp.sedl.org/pub/mirrors/CPAN/ - Utah ftp://mirror.xmission.com/CPAN/ - Virginia ftp://ftp.perl.org/pub/perl/CPAN/ + Mexico ftp://ftp.msg.com.mx/pub/CPAN/ + New York ftp://ftp.deao.net/pub/CPAN/ + ftp://ftp.rge.com/pub/languages/perl/ + North Carolina ftp://ftp.duke.edu/pub/perl/ + Nova Scotia ftp://cpan.chebucto.ns.ca/pub/CPAN/ + Oklahoma ftp://ftp.ou.edu/mirrors/CPAN/ + Ontario ftp://ftp.crc.ca/pub/packages/lang/perl/CPAN/ + Oregon ftp://ftp.orst.edu/pub/packages/CPAN/ + Pennsylvania ftp://ftp.epix.net/pub/languages/perl/ + Tennessee ftp://ftp.sunsite.utk.edu/pub/CPAN/ + Texas ftp://ftp.sedl.org/pub/mirrors/CPAN/ + ftp://jhcloos.com/pub/mirror/CPAN/ + Utah ftp://mirror.xmission.com/CPAN/ + Virginia ftp://ftp.perl.org/pub/perl/CPAN/ ftp://ruff.cs.jmu.edu/pub/CPAN/ - Washington ftp://ftp.spu.edu/pub/CPAN/ + Washington ftp://ftp-mirror.internap.com/pub/CPAN/ + ftp://ftp.llarian.net/pub/CPAN/ + ftp://ftp.spu.edu/pub/CPAN/ -=item * -South America - Brazil ftp://cpan.if.usp.br/pub/mirror/CPAN/ - Chile ftp://ftp.ing.puc.cl/pub/unix/perl/CPAN/ - ftp://sunsite.dcc.uchile.cl/pub/Lang/perl/CPAN/ +=item South America + + Brazil ftp://cpan.if.usp.br/pub/mirror/CPAN/ + ftp://ftp.matrix.com.br/pub/perl/ + Chile ftp://sunsite.dcc.uchile.cl/pub/Lang/PERL/ =back For an up-to-date listing of CPAN sites, -see F<http://www.perl.com/perl/CPAN> or F<ftp://ftp.perl.com/perl/>. +see http://www.perl.com/perl/CPAN/SITES or ftp://www.perl.com/CPAN/SITES . =head1 Modules: Creation, Use, and Abuse @@ -791,6 +1125,10 @@ scheme as the original author. =item Try to design the new module to be easy to extend and reuse. +Try to C<use warnings;> (or C<use warnings qw(...);>). +Remember that you can add C<no warnings qw(...);> to individual blocks +of code that need less warnings. + Use blessed references. Use the two argument form of bless to bless into the class name given as the first parameter of the constructor, e.g.,: @@ -815,12 +1153,12 @@ appropriate. Split large methods into smaller more flexible ones. Inherit methods from other modules if appropriate. Avoid class name tests like: C<die "Invalid" unless ref $ref eq 'FOO'>. -Generally you can delete the "C<eq 'FOO'>" part with no harm at all. +Generally you can delete the C<eq 'FOO'> part with no harm at all. Let the objects look after themselves! Generally, avoid hard-wired class names as far as possible. -Avoid C<$r-E<gt>Class::func()> where using C<@ISA=qw(... Class ...)> and -C<$r-E<gt>func()> would work (see L<perlbot> for more details). +Avoid C<< $r->Class::func() >> where using C<@ISA=qw(... Class ...)> and +C<< $r->func() >> would work (see L<perlbot> for more details). Use autosplit so little used or newly added functions won't be a burden to programs that don't use them. Add test functions to @@ -829,7 +1167,7 @@ the module after __END__ either using AutoSplit or by saying: eval join('',<main::DATA>) || die $@ unless caller(); Does your module pass the 'empty subclass' test? If you say -"C<@SUBCLASS::ISA = qw(YOURCLASS);>" your applications should be able +C<@SUBCLASS::ISA = qw(YOURCLASS);> your applications should be able to use SUBCLASS in exactly the same way as YOURCLASS. For example, does your application still work if you change: C<$obj = new YOURCLASS;> into: C<$obj = new SUBCLASS;> ? @@ -838,11 +1176,18 @@ Avoid keeping any state information in your packages. It makes it difficult for multiple other packages to use yours. Keep state information in objects. -Always use B<-w>. Try to C<use strict;> (or C<use strict qw(...);>). +Always use B<-w>. + +Try to C<use strict;> (or C<use strict qw(...);>). Remember that you can add C<no strict qw(...);> to individual blocks -of code that need less strictness. Always use B<-w>. Always use B<-w>! +of code that need less strictness. + +Always use B<-w>. + Follow the guidelines in the perlstyle(1) manual. +Always use B<-w>. + =item Some simple style guidelines The perlstyle manual supplied with Perl has many helpful points. @@ -870,7 +1215,7 @@ or nature of a variable. For example: $no_caps_here function scope my() or local() variables Function and method names seem to work best as all lowercase. -e.g., C<$obj-E<gt>as_string()>. +e.g., C<< $obj->as_string() >>. You can use a leading underscore to indicate that a variable or function should not be used outside the package that defined it. @@ -886,7 +1231,7 @@ export try to use @EXPORT_OK in preference to @EXPORT and avoid short or common names to reduce the risk of name clashes. Generally anything not exported is still accessible from outside the -module using the ModuleName::item_name (or C<$blessed_ref-E<gt>method>) +module using the ModuleName::item_name (or C<< $blessed_ref->method >>) syntax. By convention you can use a leading underscore on names to indicate informally that they are 'internal' and not for public use. @@ -1012,7 +1357,7 @@ should store your module's version number in a non-my package variable called $VERSION. This should be a floating point number with at least two digits after the decimal (i.e., hundredths, e.g, C<$VERSION = "0.01">). Don't use a "1.3.2" style version. -See Exporter.pm in Perl5.001m or later for details. +See L<Exporter> for details. It may be handy to add a function or method to retrieve the number. Use the number in announcements and archive file names when @@ -1026,7 +1371,7 @@ module (or the module itself if small) to the comp.lang.perl.announce Usenet newsgroup. This will at least ensure very wide once-off distribution. -If possible you should place the module into a major ftp archive and +If possible, register the module with CPAN. You should include details of its location in your announcement. Some notes about ftp archives: Please use a long descriptive file @@ -1040,16 +1385,17 @@ location. FTP Archives for Perl Modules: -Follow the instructions and links on +Follow the instructions and links on: - http://franz.ww.tu-berlin.de/modulelist + http://www.perl.com/CPAN/modules/00modlist.long.html + http://www.perl.com/CPAN/modules/04pause.html or upload to one of these sites: - ftp://franz.ww.tu-berlin.de/incoming - ftp://ftp.cis.ufl.edu/incoming + https://pause.kbx.de/pause/ + http://pause.perl.org/pause/ -and notify <F<upload@franz.ww.tu-berlin.de>>. +and notify <modules@perl.org>. By using the WWW interface you can ask the Upload Server to mirror your modules from your ftp or WWW site into your own directory on @@ -1061,7 +1407,7 @@ Please remember to send me an updated entry for the Module list! Always strive to remain compatible with previous released versions. Otherwise try to add a mechanism to revert to the -old behaviour if people rely on it. Document incompatible changes. +old behavior if people rely on it. Document incompatible changes. =back @@ -1087,8 +1433,8 @@ it worth it unless you plan to make other changes at the same time? =item Make the most of the opportunity. If you are going to convert the script to a module you can use the -opportunity to redesign the interface. The 'Guidelines for Module -Creation' above include many of the issues you should consider. +opportunity to redesign the interface. The guidelines for module +creation above include many of the issues you should consider. =item The pl2pm utility will get you started. diff --git a/contrib/perl5/pod/perlnumber.pod b/contrib/perl5/pod/perlnumber.pod new file mode 100644 index 0000000..c83e0532 --- /dev/null +++ b/contrib/perl5/pod/perlnumber.pod @@ -0,0 +1,185 @@ +=head1 NAME + +perlnumber - semantics of numbers and numeric operations in Perl + +=head1 SYNOPSIS + + $n = 1234; # decimal integer + $n = 0b1110011; # binary integer + $n = 01234; # octal integer + $n = 0x1234; # hexadecimal integer + $n = 12.34e-56; # exponential notation + $n = "-12.34e56"; # number specified as a string + $n = "1234"; # number specified as a string + $n = v49.50.51.52; # number specified as a string, which in + # turn is specified in terms of numbers :-) + +=head1 DESCRIPTION + +This document describes how Perl internally handles numeric values. + +Perl's operator overloading facility is completely ignored here. Operator +overloading allows user-defined behaviors for numbers, such as operations +over arbitrarily large integers, floating points numbers with arbitrary +precision, operations over "exotic" numbers such as modular arithmetic or +p-adic arithmetic, and so on. See L<overload> for details. + +=head1 Storing numbers + +Perl can internally represent numbers in 3 different ways: as native +integers, as native floating point numbers, and as decimal strings. +Decimal strings may have an exponential notation part, as in C<"12.34e-56">. +I<Native> here means "a format supported by the C compiler which was used +to build perl". + +The term "native" does not mean quite as much when we talk about native +integers, as it does when native floating point numbers are involved. +The only implication of the term "native" on integers is that the limits for +the maximal and the minimal supported true integral quantities are close to +powers of 2. However, "native" floats have a most fundamental +restriction: they may represent only those numbers which have a relatively +"short" representation when converted to a binary fraction. For example, +0.9 cannot be respresented by a native float, since the binary fraction +for 0.9 is infinite: + + binary0.1110011001100... + +with the sequence C<1100> repeating again and again. In addition to this +limitation, the exponent of the binary number is also restricted when it +is represented as a floating point number. On typical hardware, floating +point values can store numbers with up to 53 binary digits, and with binary +exponents between -1024 and 1024. In decimal representation this is close +to 16 decimal digits and decimal exponents in the range of -304..304. +The upshot of all this is that Perl cannot store a number like +12345678901234567 as a floating point number on such architectures without +loss of information. + +Similarly, decimal strings can represent only those numbers which have a +finite decimal expansion. Being strings, and thus of arbitrary length, there +is no practical limit for the exponent or number of decimal digits for these +numbers. (But realize that what we are discussing the rules for just the +I<storage> of these numbers. The fact that you can store such "large" numbers +does not mean that that the I<operations> over these numbers will use all +of the significant digits. +See L<"Numeric operators and numeric conversions"> for details.) + +In fact numbers stored in the native integer format may be stored either +in the signed native form, or in the unsigned native form. Thus the limits +for Perl numbers stored as native integers would typically be -2**31..2**32-1, +with appropriate modifications in the case of 64-bit integers. Again, this +does not mean that Perl can do operations only over integers in this range: +it is possible to store many more integers in floating point format. + +Summing up, Perl numeric values can store only those numbers which have +a finite decimal expansion or a "short" binary expansion. + +=head1 Numeric operators and numeric conversions + +As mentioned earlier, Perl can store a number in any one of three formats, +but most operators typically understand only one of those formats. When +a numeric value is passed as an argument to such an operator, it will be +converted to the format understood by the operator. + +Six such conversions are possible: + + native integer --> native floating point (*) + native integer --> decimal string + native floating_point --> native integer (*) + native floating_point --> decimal string (*) + decimal string --> native integer + decimal string --> native floating point (*) + +These conversions are governed by the following general rules: + +=over + +=item * + +If the source number can be represented in the target form, that +representation is used. + +=item * + +If the source number is outside of the limits representable in the target form, +a representation of the closest limit is used. (I<Loss of information>) + +=item * + +If the source number is between two numbers representable in the target form, +a representation of one of these numbers is used. (I<Loss of information>) + +=item * + +In C<< native floating point --> native integer >> conversions the magnitude +of the result is less than or equal to the magnitude of the source. +(I<"Rounding to zero".>) + +=item * + +If the C<< decimal string --> native integer >> conversion cannot be done +without loss of information, the result is compatible with the conversion +sequence C<< decimal_string --> native_floating_point --> native_integer >>. +In particular, rounding is strongly biased to 0, though a number like +C<"0.99999999999999999999"> has a chance of being rounded to 1. + +=back + +B<RESTRICTION>: The conversions marked with C<(*)> above involve steps +performed by the C compiler. In particular, bugs/features of the compiler +used may lead to breakage of some of the above rules. + +=head1 Flavors of Perl numeric operations + +Perl operations which take a numeric argument treat that argument in one +of four different ways: they may force it to one of the integer/floating/ +string formats, or they may behave differently depending on the format of +the operand. Forcing a numeric value to a particular format does not +change the number stored in the value. + +All the operators which need an argument in the integer format treat the +argument as in modular arithmetic, e.g., C<mod 2**32> on a 32-bit +architecture. C<sprintf "%u", -1> therefore provides the same result as +C<sprintf "%u", ~0>. + +=over + +=item Arithmetic operators except, C<no integer> + +force the argument into the floating point format. + +=item Arithmetic operators except, C<use integer> + +=item Bitwise operators, C<no integer> + +force the argument into the integer format if it is not a string. + +=item Bitwise operators, C<use integer> + +force the argument into the integer format + +=item Operators which expect an integer + +force the argument into the integer format. This is applicable +to the third and fourth arguments of C<sysread>, for example. + +=item Operators which expect a string + +force the argument into the string format. For example, this is +applicable to C<printf "%s", $value>. + +=back + +Though forcing an argument into a particular form does not change the +stored number, Perl remembers the result of such conversions. In +particular, though the first such conversion may be time-consuming, +repeated operations will not need to redo the conversion. + +=head1 AUTHOR + +Ilya Zakharevich C<ilya@math.ohio-state.edu> + +Editorial adjustments by Gurusamy Sarathy <gsar@ActiveState.com> + +=head1 SEE ALSO + +L<overload> diff --git a/contrib/perl5/pod/perlobj.pod b/contrib/perl5/pod/perlobj.pod index a997ae0..4e45aff 100644 --- a/contrib/perl5/pod/perlobj.pod +++ b/contrib/perl5/pod/perlobj.pod @@ -4,10 +4,10 @@ perlobj - Perl objects =head1 DESCRIPTION -First of all, you need to understand what references are in Perl. +First you need to understand what references are in Perl. See L<perlref> for that. Second, if you still find the following reference work too complicated, a tutorial on object-oriented programming -in Perl can be found in L<perltoot>. +in Perl can be found in L<perltoot> and L<perltootc>. If you're still with us, then here are three very simple definitions that you should find reassuring. @@ -50,7 +50,7 @@ a construct this way, too: package Critter; sub spawn { bless {} } -In fact, this might even be preferable, because the C++ programmers won't +This might even be preferable, because the C++ programmers won't be tricked into thinking that C<new> works in Perl as it does in C++. It doesn't. We recommend that you name your constructors whatever makes sense in the context of the problem you're solving. For example, @@ -73,7 +73,7 @@ have been returned directly, like this: return $self; } -In fact, you often see such a thing in more complicated constructors +You often see such a thing in more complicated constructors that wish to call methods in the class as part of the construction: sub new { @@ -96,8 +96,8 @@ so that your constructors may be inherited: return $self; } -Or if you expect people to call not just C<CLASS-E<gt>new()> but also -C<$obj-E<gt>new()>, then use something like this. The initialize() +Or if you expect people to call not just C<< CLASS->new() >> but also +C<< $obj->new() >>, then use something like this. The initialize() method used will be of whatever $class we blessed the object into: @@ -115,12 +115,13 @@ reference as an ordinary reference. Outside the class package, the reference is generally treated as an opaque value that may be accessed only through the class's methods. -A constructor may re-bless a referenced object currently belonging to -another class, but then the new class is responsible for all cleanup -later. The previous blessing is forgotten, as an object may belong -to only one class at a time. (Although of course it's free to -inherit methods from many classes.) If you find yourself having to -do this, the parent class is probably misbehaving, though. +Although a constructor can in theory re-bless a referenced object +currently belonging to another class, this is almost certainly going +to get you into trouble. The new class is responsible for all +cleanup later. The previous blessing is forgotten, as an object +may belong to only one class at a time. (Although of course it's +free to inherit methods from many classes.) If you find yourself +having to do this, the parent class is probably misbehaving, though. A clarification: Perl objects are blessed. References are not. Objects know which package they belong to. References do not. The bless() @@ -154,7 +155,7 @@ last base class. Several commonly used methods are automatically supplied in the UNIVERSAL class; see L<"Default UNIVERSAL methods"> for more details. -If a missing method is found in one of the base classes, it is cached +If a missing method is found in a base class, it is cached in the current class for efficiency. Changing @ISA or defining new subroutines invalidates the cache and causes Perl to do the lookup again. @@ -186,16 +187,16 @@ is to prepend your fieldname in the hash with the package name. Unlike say C++, Perl doesn't provide any special syntax for method definition. (It does provide a little syntax for method invocation though. More on that later.) A method expects its first argument -to be the object (reference) or package (string) it is being invoked on. There are just two -types of methods, which we'll call class and instance. -(Sometimes you'll hear these called static and virtual, in honor of -the two C++ method types they most closely resemble.) +to be the object (reference) or package (string) it is being invoked +on. There are two ways of calling methods, which we'll call class +methods and instance methods. A class method expects a class name as the first argument. It -provides functionality for the class as a whole, not for any individual -object belonging to the class. Constructors are typically class -methods. Many class methods simply ignore their first argument, because -they already know what package they're in, and don't care what package +provides functionality for the class as a whole, not for any +individual object belonging to the class. Constructors are often +class methods, but see L<perltoot> and L<perltootc> for alternatives. +Many class methods simply ignore their first argument, because they +already know what package they're in and don't care what package they were invoked via. (These aren't necessarily the same, because class methods follow the inheritance tree just like ordinary instance methods.) Another typical use for class methods is to look up an @@ -238,7 +239,7 @@ indirect object slot: display {find Critter "Fred"} 'Height', 'Weight'; -For C++ fans, there's also a syntax using -E<gt> notation that does exactly +For C++ fans, there's also a syntax using -> notation that does exactly the same thing. The parentheses are required if there are any arguments. $fred = Critter->find("Fred"); @@ -284,13 +285,13 @@ For more reasons why the indirect object syntax is ambiguous, see L<"WARNING"> below. There are times when you wish to specify which class's method to use. -In this case, you can call your method as an ordinary subroutine +Here you can call your method as an ordinary subroutine call, being sure to pass the requisite first argument explicitly: $fred = MyCritter::find("Critter", "Fred"); MyCritter::display($fred, 'Height', 'Weight'); -Note however, that this does not do any inheritance. If you wish +Unlike method calls, function calls don't consider inheritance. If you wish merely to specify that Perl should I<START> looking for a method in a particular package, use an ordinary method call, but qualify the method name with the package like this: @@ -310,10 +311,59 @@ class. Sometimes you want to call a method when you don't know the method name ahead of time. You can use the arrow form, replacing the method name -with a simple scalar variable containing the method name: +with a simple scalar variable containing the method name or a +reference to the function. $method = $fast ? "findfirst" : "findbest"; - $fred->$method(@args); + $fred->$method(@args); # call by name + + if ($coderef = $fred->can($parent . "::findbest")) { + $self->$coderef(@args); # call by coderef + } + +=head2 WARNING + +While indirect object syntax may well be appealing to English speakers and +to C++ programmers, be not seduced! It suffers from two grave problems. + +The first problem is that an indirect object is limited to a name, +a scalar variable, or a block, because it would have to do too much +lookahead otherwise, just like any other postfix dereference in the +language. (These are the same quirky rules as are used for the filehandle +slot in functions like C<print> and C<printf>.) This can lead to horribly +confusing precedence problems, as in these next two lines: + + move $obj->{FIELD}; # probably wrong! + move $ary[$i]; # probably wrong! + +Those actually parse as the very surprising: + + $obj->move->{FIELD}; # Well, lookee here + $ary->move([$i]); # Didn't expect this one, eh? + +Rather than what you might have expected: + + $obj->{FIELD}->move(); # You should be so lucky. + $ary[$i]->move; # Yeah, sure. + +The left side of ``->'' is not so limited, because it's an infix operator, +not a postfix operator. + +As if that weren't bad enough, think about this: Perl must guess I<at +compile time> whether C<name> and C<move> above are functions or methods. +Usually Perl gets it right, but when it doesn't it, you get a function +call compiled as a method, or vice versa. This can introduce subtle +bugs that are hard to unravel. For example, calling a method C<new> +in indirect notation--as C++ programmers are so wont to do--can +be miscompiled into a subroutine call if there's already a C<new> +function in scope. You'd end up calling the current package's C<new> +as a subroutine, rather than the desired class's method. The compiler +tries to cheat by remembering bareword C<require>s, but the grief if it +messes up just isn't worth the years of debugging it would likely take +you to track such subtle bugs down. + +The infix arrow notation using ``C<< -> >>'' doesn't suffer from either +of these disturbing ambiguities, so we recommend you use it exclusively. =head2 Default UNIVERSAL methods @@ -361,7 +411,7 @@ C<isa> uses a very similar method and cache-ing strategy. This may cause strange effects if the Perl code dynamically changes @ISA in any package. You may add other methods to the UNIVERSAL class via Perl or XS code. -You do not need to C<use UNIVERSAL> in order to make these methods +You do not need to C<use UNIVERSAL> to make these methods available to your program. This is necessary only if you wish to have C<isa> available as a plain subroutine in the current package. @@ -386,55 +436,11 @@ object destruction, or for ensuring that destructors in the base classes of your choosing get called. Explicitly calling DESTROY is also possible, but is usually never needed. -Do not confuse the foregoing with how objects I<CONTAINED> in the current +Do not confuse the previous discussion with how objects I<CONTAINED> in the current one are destroyed. Such objects will be freed and destroyed automatically when the current object is freed, provided no other references to them exist elsewhere. -=head2 WARNING - -While indirect object syntax may well be appealing to English speakers and -to C++ programmers, be not seduced! It suffers from two grave problems. - -The first problem is that an indirect object is limited to a name, -a scalar variable, or a block, because it would have to do too much -lookahead otherwise, just like any other postfix dereference in the -language. (These are the same quirky rules as are used for the filehandle -slot in functions like C<print> and C<printf>.) This can lead to horribly -confusing precedence problems, as in these next two lines: - - move $obj->{FIELD}; # probably wrong! - move $ary[$i]; # probably wrong! - -Those actually parse as the very surprising: - - $obj->move->{FIELD}; # Well, lookee here - $ary->move->[$i]; # Didn't expect this one, eh? - -Rather than what you might have expected: - - $obj->{FIELD}->move(); # You should be so lucky. - $ary[$i]->move; # Yeah, sure. - -The left side of ``-E<gt>'' is not so limited, because it's an infix operator, -not a postfix operator. - -As if that weren't bad enough, think about this: Perl must guess I<at -compile time> whether C<name> and C<move> above are functions or methods. -Usually Perl gets it right, but when it doesn't it, you get a function -call compiled as a method, or vice versa. This can introduce subtle -bugs that are hard to unravel. For example, calling a method C<new> -in indirect notation--as C++ programmers are so wont to do--can -be miscompiled into a subroutine call if there's already a C<new> -function in scope. You'd end up calling the current package's C<new> -as a subroutine, rather than the desired class's method. The compiler -tries to cheat by remembering bareword C<require>s, but the grief if it -messes up just isn't worth the years of debugging it would likely take -you to to track such subtle bugs down. - -The infix arrow notation using ``C<-E<gt>>'' doesn't suffer from either -of these disturbing ambiguities, so we recommend you use it exclusively. - =head2 Summary That's about all there is to it. Now you need just to go off and buy a @@ -443,8 +449,8 @@ with it for the next six months or so. =head2 Two-Phased Garbage Collection -For most purposes, Perl uses a fast and simple reference-based -garbage collection system. For this reason, there's an extra +For most purposes, Perl uses a fast and simple, reference-based +garbage collection system. That means there's an extra dereference going on at some level, so if you haven't built your Perl executable using your C compiler's C<-O> flag, performance will suffer. If you I<have> built Perl with C<cc -O>, then this @@ -529,8 +535,8 @@ When run as F</tmp/test>, the following output is produced: Notice that "global destruction" bit there? That's the thread garbage collector reaching the unreachable. -Objects are always destructed, even when regular refs aren't and in fact -are destructed in a separate pass before ordinary refs just to try to +Objects are always destructed, even when regular refs aren't. Objects +are destructed in a separate pass before ordinary refs just to prevent object destructors from using refs that have been themselves destructed. Plain refs are only garbage-collected if the destruct level is greater than 0. You can test the higher levels of global destruction @@ -547,8 +553,8 @@ breaks the circularities in the self-referential structure. =head1 SEE ALSO -A kinder, gentler tutorial on object-oriented programming in Perl can -be found in L<perltoot>. -You should also check out L<perlbot> for other object tricks, traps, and tips, -as well as L<perlmodlib> for some style guides on constructing both modules +A kinder, gentler tutorial on object-oriented programming in Perl +can be found in L<perltoot> and L<perltootc>. You should also check +out L<perlbot> for other object tricks, traps, and tips, as well +as L<perlmodlib> for some style guides on constructing both modules and classes. diff --git a/contrib/perl5/pod/perlop.pod b/contrib/perl5/pod/perlop.pod index 9f6d965..ce6fb66 100644 --- a/contrib/perl5/pod/perlop.pod +++ b/contrib/perl5/pod/perlop.pod @@ -5,11 +5,11 @@ perlop - Perl operators and precedence =head1 SYNOPSIS Perl operators have the following associativity and precedence, -listed from highest precedence to lowest. Note that all operators -borrowed from C keep the same precedence relationship with each other, -even where C's precedence is slightly screwy. (This makes learning -Perl easier for C folks.) With very few exceptions, these all -operate on scalar values only, not array values. +listed from highest precedence to lowest. Operators borrowed from +C keep the same precedence relationship with each other, even where +C's precedence is slightly screwy. (This makes learning Perl easier +for C folks.) With very few exceptions, these all operate on scalar +values only, not array values. left terms and list operators (leftward) left -> @@ -64,11 +64,11 @@ For example, in @ary = (1, 3, sort 4, 2); print @ary; # prints 1324 -the commas on the right of the sort are evaluated before the sort, but -the commas on the left are evaluated after. In other words, list -operators tend to gobble up all the arguments that follow them, and +the commas on the right of the sort are evaluated before the sort, +but the commas on the left are evaluated after. In other words, +list operators tend to gobble up all arguments that follow, and then act like a simple TERM with regard to the preceding expression. -Note that you have to be careful with parentheses: +Be careful with parentheses: # These evaluate exit before doing the print: print($foo, exit); # Obviously not what you want. @@ -95,16 +95,18 @@ as well as L<"I/O Operators">. =head2 The Arrow Operator -Just as in C and C++, "C<-E<gt>>" is an infix dereference operator. If the -right side is either a C<[...]> or C<{...}> subscript, then the left side -must be either a hard or symbolic reference to an array or hash (or -a location capable of holding a hard reference, if it's an lvalue (assignable)). -See L<perlref>. +"C<< -> >>" is an infix dereference operator, just as it is in C +and C++. If the right side is either a C<[...]>, C<{...}>, or a +C<(...)> subscript, then the left side must be either a hard or +symbolic reference to an array, a hash, or a subroutine respectively. +(Or technically speaking, a location capable of holding a hard +reference, if it's an array or hash reference being used for +assignment.) See L<perlreftut> and L<perlref>. -Otherwise, the right side is a method name or a simple scalar variable -containing the method name, and the left side must either be an object -(a blessed reference) or a class name (that is, a package name). -See L<perlobj>. +Otherwise, the right side is a method name or a simple scalar +variable containing either the method name or a subroutine reference, +and the left side must be either an object (a blessed reference) +or a class name (that is, a package name). See L<perlobj>. =head2 Auto-increment and Auto-decrement @@ -129,7 +131,7 @@ The auto-decrement operator is not magical. =head2 Exponentiation -Binary "**" is the exponentiation operator. Note that it binds even more +Binary "**" is the exponentiation operator. It binds even more tightly than unary minus, so -2**4 is -(2**4), not (-2)**4. (This is implemented using C's pow(3) function, which actually works on doubles internally.) @@ -146,19 +148,22 @@ starts with a plus or minus, a string starting with the opposite sign is returned. One effect of these rules is that C<-bareword> is equivalent to C<"-bareword">. -Unary "~" performs bitwise negation, i.e., 1's complement. For example, -C<0666 &~ 027> is 0640. (See also L<Integer Arithmetic> and L<Bitwise -String Operators>.) +Unary "~" performs bitwise negation, i.e., 1's complement. For +example, C<0666 & ~027> is 0640. (See also L<Integer Arithmetic> and +L<Bitwise String Operators>.) Note that the width of the result is +platform-dependent: ~0 is 32 bits wide on a 32-bit platform, but 64 +bits wide on a 64-bit platform, so if you are expecting a certain bit +width, remember use the & operator to mask off the excess bits. Unary "+" has no effect whatsoever, even on strings. It is useful syntactically for separating a function name from a parenthesized expression that would otherwise be interpreted as the complete list of function arguments. (See examples above under L<Terms and List Operators (Leftward)>.) -Unary "\" creates a reference to whatever follows it. See L<perlref>. -Do not confuse this behavior with the behavior of backslash within a -string, although both forms do convey the notion of protecting the next -thing from interpretation. +Unary "\" creates a reference to whatever follows it. See L<perlreftut> +and L<perlref>. Do not confuse this behavior with the behavior of +backslash within a string, although both forms do convey the notion +of protecting the next thing from interpolation. =head2 Binding Operators @@ -167,10 +172,13 @@ search or modify the string $_ by default. This operator makes that kind of operation work on some other string. The right argument is a search pattern, substitution, or transliteration. The left argument is what is supposed to be searched, substituted, or transliterated instead of the default -$_. The return value indicates the success of the operation. (If the -right argument is an expression rather than a search pattern, +$_. When used in scalar context, the return value generally indicates the +success of the operation. Behavior in list context depends on the particular +operator. See L</"Regexp Quote-Like Operators"> for details. + +If the right argument is an expression rather than a search pattern, substitution, or transliteration, it is interpreted as a search pattern at run -time. This can be is less efficient than an explicit search, because the +time. This can be less efficient than an explicit search, because the pattern must be compiled every time the expression is evaluated. Binary "!~" is just like "=~" except the return value is negated in @@ -193,10 +201,11 @@ to the modulus operator as implemented by your C compiler. This operator is not as well defined for negative operands, but it will execute faster. -Binary "x" is the repetition operator. In scalar context, it -returns a string consisting of the left operand repeated the number of -times specified by the right operand. In list context, if the left -operand is a list in parentheses, it repeats the list. +Binary "x" is the repetition operator. In scalar context or if the left +operand is not enclosed in parentheses, it returns a string consisting +of the left operand repeated the number of times specified by the right +operand. In list context, if the left operand is enclosed in +parentheses, it repeats the list. print '-' x 80; # print row of dashes @@ -256,16 +265,16 @@ See also L<"Terms and List Operators (Leftward)">. =head2 Relational Operators -Binary "E<lt>" returns true if the left argument is numerically less than +Binary "<" returns true if the left argument is numerically less than the right argument. -Binary "E<gt>" returns true if the left argument is numerically greater +Binary ">" returns true if the left argument is numerically greater than the right argument. -Binary "E<lt>=" returns true if the left argument is numerically less than +Binary "<=" returns true if the left argument is numerically less than or equal to the right argument. -Binary "E<gt>=" returns true if the left argument is numerically greater +Binary ">=" returns true if the left argument is numerically greater than or equal to the right argument. Binary "lt" returns true if the left argument is stringwise less than @@ -288,7 +297,7 @@ the right argument. Binary "!=" returns true if the left argument is numerically not equal to the right argument. -Binary "E<lt>=E<gt>" returns -1, 0, or 1 depending on whether the left +Binary "<=>" returns -1, 0, or 1 depending on whether the left argument is numerically less than, equal to, or greater than the right argument. @@ -359,18 +368,20 @@ With the C-style operators that would have been written like this: unlink("alpha", "beta", "gamma") || (gripe(), next LINE); -Use "or" for assignment is unlikely to do what you want; see below. +Using "or" for assignment is unlikely to do what you want; see below. =head2 Range Operators Binary ".." is the range operator, which is really two different operators depending on the context. In list context, it returns an -array of values counting (by ones) from the left value to the right -value. This is useful for writing C<foreach (1..10)> loops and for -doing slice operations on arrays. In the current implementation, no -temporary array is created when the range operator is used as the -expression in C<foreach> loops, but older versions of Perl might burn -a lot of memory when you write something like this: +array of values counting (up by ones) from the left value to the right +value. If the left value is greater than the right value then it +returns the empty array. The range operator is useful for writing +C<foreach (1..10)> loops and for doing slice operations on arrays. In +the current implementation, no temporary array is created when the +range operator is used as the expression in C<foreach> loops, but older +versions of Perl might burn a lot of memory when you write something +like this: for (1 .. 1_000_000) { # code @@ -382,23 +393,26 @@ of B<sed>, B<awk>, and various editors. Each ".." operator maintains its own boolean state. It is false as long as its left operand is false. Once the left operand is true, the range operator stays true until the right operand is true, I<AFTER> which the range operator becomes false -again. (It doesn't become false till the next time the range operator is +again. It doesn't become false till the next time the range operator is evaluated. It can test the right operand and become false on the same evaluation it became true (as in B<awk>), but it still returns true once. -If you don't want it to test the right operand till the next evaluation -(as in B<sed>), use three dots ("...") instead of two.) The right -operand is not evaluated while the operator is in the "false" state, and -the left operand is not evaluated while the operator is in the "true" -state. The precedence is a little lower than || and &&. The value -returned is either the empty string for false, or a sequence number -(beginning with 1) for true. The sequence number is reset for each range -encountered. The final sequence number in a range has the string "E0" -appended to it, which doesn't affect its numeric value, but gives you -something to search for if you want to exclude the endpoint. You can -exclude the beginning point by waiting for the sequence number to be -greater than 1. If either operand of scalar ".." is a constant expression, -that operand is implicitly compared to the C<$.> variable, the current -line number. Examples: +If you don't want it to test the right operand till the next +evaluation, as in B<sed>, just use three dots ("...") instead of +two. In all other regards, "..." behaves just like ".." does. + +The right operand is not evaluated while the operator is in the +"false" state, and the left operand is not evaluated while the +operator is in the "true" state. The precedence is a little lower +than || and &&. The value returned is either the empty string for +false, or a sequence number (beginning with 1) for true. The +sequence number is reset for each range encountered. The final +sequence number in a range has the string "E0" appended to it, which +doesn't affect its numeric value, but gives you something to search +for if you want to exclude the endpoint. You can exclude the +beginning point by waiting for the sequence number to be greater +than 1. If either operand of scalar ".." is a constant expression, +that operand is implicitly compared to the C<$.> variable, the +current line number. Examples: As a scalar operator: @@ -427,7 +441,7 @@ can say @alphabet = ('A' .. 'Z'); -to get all the letters of the alphabet, or +to get all normal letters of the alphabet, or $hexdigit = (0 .. 9, 'a' .. 'f')[$num & 15]; @@ -462,8 +476,6 @@ legal lvalues (meaning that you can assign to them): ($a_or_b ? $a : $b) = $c; -This is not necessarily guaranteed to contribute to the readability of your program. - Because this operator produces an assignable result, using assignments without parentheses will get you in trouble. For example, this: @@ -477,6 +489,10 @@ Rather than this: ($a % 2) ? ($a += 10) : ($a += 2) +That should probably be written more simply as: + + $a += ($a % 2) ? 10 : 2; + =head2 Assignment Operators "=" is the ordinary assignment operator. @@ -498,13 +514,13 @@ The following are recognized: .= %= ^= x= -Note that while these are grouped by family, they all have the precedence +Although these are grouped by family, they all have the precedence of assignment. -Unlike in C, the assignment operator produces a valid lvalue. Modifying -an assignment is equivalent to doing the assignment and then modifying -the variable that was assigned to. This is useful for modifying -a copy of something, like this: +Unlike in C, the scalar assignment operator produces a valid lvalue. +Modifying an assignment is equivalent to doing the assignment and +then modifying the variable that was assigned to. This is useful +for modifying a copy of something, like this: ($tmp = $global) =~ tr [A-Z] [a-z]; @@ -517,6 +533,11 @@ is equivalent to $a += 2; $a *= 3; +Similarly, a list assignment in list context produces the list of +lvalues assigned to, and a list assignment in scalar context returns +the number of elements produced by the expression on the right hand +side of the assignment. + =head2 Comma Operator Binary "," is the comma operator. In scalar context it evaluates @@ -526,7 +547,7 @@ argument and returns that value. This is just like C's comma operator. In list context, it's just the list argument separator, and inserts both its arguments into the list. -The =E<gt> digraph is mostly just a synonym for the comma operator. It's useful for +The => digraph is mostly just a synonym for the comma operator. It's useful for documenting arguments that come in pairs. As of release 5.001, it also forces any word to the left of it to be interpreted as a string. @@ -571,14 +592,14 @@ probably avoid using this for assignment, only for control flow. ($a = $b) or $c; # really means this $a = $b || $c; # better written this way -However, when it's a list context assignment and you're trying to use +However, when it's a list-context assignment and you're trying to use "||" for control flow, you probably need "or" so that the assignment takes higher precedence. @info = stat($file) || die; # oops, scalar sense of stat! @info = stat($file) or die; # better, now @info gets its due -Then again, you could always use parentheses. +Then again, you could always use parentheses. Binary "xor" returns the exclusive-OR of the two surrounding expressions. It cannot short circuit, of course. @@ -600,7 +621,7 @@ operators are typed: $, @, %, and &.) =item (TYPE) -Type casting operator. +Type-casting operator. =back @@ -611,9 +632,7 @@ function as operators, providing various kinds of interpolating and pattern matching capabilities. Perl provides customary quote characters for these behaviors, but also provides a way for you to choose your quote character for any of them. In the following table, a C<{}> represents -any pair of delimiters you choose. Non-bracketing delimiters use -the same character fore and aft, but the 4 sorts of brackets -(round, angle, square, curly) will all nest. +any pair of delimiters you choose. Customary Generic Meaning Interpolates '' q{} Literal no @@ -625,18 +644,35 @@ the same character fore and aft, but the 4 sorts of brackets s{}{} Substitution yes (unless '' is delimiter) tr{}{} Transliteration no (but see below) -Note that there can be whitespace between the operator and the quoting +Non-bracketing delimiters use the same character fore and aft, but the four +sorts of brackets (round, angle, square, curly) will all nest, which means +that + + q{foo{bar}baz} + +is the same as + + 'foo{bar}baz' + +Note, however, that this does not always work for quoting Perl code: + + $s = q{ if($a eq "}") ... }; # WRONG + +is a syntax error. The C<Text::Balanced> module on CPAN is able to do this +properly. + +There can be whitespace between the operator and the quoting characters, except when C<#> is being used as the quoting character. -C<q#foo#> is parsed as being the string C<foo>, while C<q #foo#> is the -operator C<q> followed by a comment. Its argument will be taken from the -next line. This allows you to write: +C<q#foo#> is parsed as the string C<foo>, while C<q #foo#> is the +operator C<q> followed by a comment. Its argument will be taken +from the next line. This allows you to write: s {foo} # Replace foo {bar} # with bar. -For constructs that do interpolation, variables beginning with "C<$>" -or "C<@>" are interpolated, as are the following sequences. Within -a transliteration, the first ten of these sequences may be used. +For constructs that do interpolate, variables beginning with "C<$>" +or "C<@>" are interpolated, as are the following escape sequences. Within +a transliteration, the first eleven of these sequences may be used. \t tab (HT, TAB) \n newline (NL) @@ -647,7 +683,9 @@ a transliteration, the first ten of these sequences may be used. \e escape (ESC) \033 octal char (ESC) \x1b hex char (ESC) - \c[ control char + \x{263a} wide hex char (SMILEY) + \c[ control char (ESC) + \N{name} named char \l lowercase next char \u uppercase next char @@ -657,11 +695,12 @@ a transliteration, the first ten of these sequences may be used. \Q quote non-word characters till \E If C<use locale> is in effect, the case map used by C<\l>, C<\L>, C<\u> -and C<\U> is taken from the current locale. See L<perllocale>. +and C<\U> is taken from the current locale. See L<perllocale>. For +documentation of C<\N{name}>, see L<charnames>. All systems use the virtual C<"\n"> to represent a line terminator, called a "newline". There is no such thing as an unvarying, physical -newline character. It is an illusion that the operating system, +newline character. It is only an illusion that the operating system, device drivers, C libraries, and Perl all conspire to preserve. Not all systems read C<"\r"> as ASCII CR and C<"\n"> as ASCII LF. For example, on a Mac, these are reversed, and on systems without line terminator, @@ -684,28 +723,17 @@ interpolated, so that regular expressions may be incorporated into the pattern from the variables. If this is not what you want, use C<\Q> to interpolate a variable literally. -Apart from the above, there are no multiple levels of interpolation. In -particular, contrary to the expectations of shell programmers, back-quotes -do I<NOT> interpolate within double quotes, nor do single quotes impede -evaluation of variables when used within double quotes. +Apart from the behavior described above, Perl does not expand +multiple levels of interpolation. In particular, contrary to the +expectations of shell programmers, back-quotes do I<NOT> interpolate +within double quotes, nor do single quotes impede evaluation of +variables when used within double quotes. =head2 Regexp Quote-Like Operators Here are the quote-like operators that apply to pattern matching and related activities. -Most of this section is related to use of regular expressions from Perl. -Such a use may be considered from two points of view: Perl handles a -a string and a "pattern" to RE (regular expression) engine to match, -RE engine finds (or does not find) the match, and Perl uses the findings -of RE engine for its operation, possibly asking the engine for other matches. - -RE engine has no idea what Perl is going to do with what it finds, -similarly, the rest of Perl has no idea what a particular regular expression -means to RE engine. This creates a clean separation, and in this section -we discuss matching from Perl point of view only. The other point of -view may be found in L<perlre>. - =over 8 =item ?PATTERN? @@ -724,21 +752,22 @@ patterns local to the current package are reset. reset if eof; # clear ?? status for next file } -This usage is vaguely deprecated, and may be removed in some future -version of Perl. +This usage is vaguely depreciated, which means it just might possibly +be removed in some distant future version of Perl, perhaps somewhere +around the year 2168. =item m/PATTERN/cgimosx =item /PATTERN/cgimosx Searches a string for a pattern match, and in scalar context returns -true (1) or false (''). If no string is specified via the C<=~> or -C<!~> operator, the $_ string is searched. (The string specified with -C<=~> need not be an lvalue--it may be the result of an expression -evaluation, but remember the C<=~> binds rather tightly.) See also -L<perlre>. -See L<perllocale> for discussion of additional considerations that apply -when C<use locale> is in effect. +true if it succeeds, false if it fails. If no string is specified +via the C<=~> or C<!~> operator, the $_ string is searched. (The +string specified with C<=~> need not be an lvalue--it may be the +result of an expression evaluation, but remember the C<=~> binds +rather tightly.) See also L<perlre>. See L<perllocale> for +discussion of additional considerations that apply when C<use locale> +is in effect. Options are: @@ -752,11 +781,10 @@ Options are: If "/" is the delimiter then the initial C<m> is optional. With the C<m> you can use any pair of non-alphanumeric, non-whitespace characters -as delimiters. This is particularly useful for matching Unix path names -that contain "/", to avoid LTS (leaning toothpick syndrome). If "?" is +as delimiters. This is particularly useful for matching path names +that contain "/", to avoid LTS (leaning toothpick syndrome). If "?" is the delimiter, then the match-only-once rule of C<?PATTERN?> applies. -If "'" is the delimiter, no variable interpolation is performed on the -PATTERN. +If "'" is the delimiter, no interpolation is performed on the PATTERN. PATTERN may contain variables, which will be interpolated (and the pattern recompiled) every time the pattern search is evaluated, except @@ -767,12 +795,12 @@ the trailing delimiter. This avoids expensive run-time recompilations, and is useful when the value you are interpolating won't change over the life of the script. However, mentioning C</o> constitutes a promise that you won't change the variables in the pattern. If you change them, -Perl won't even notice. +Perl won't even notice. See also L<"qr//">. If the PATTERN evaluates to the empty string, the last I<successfully> matched regular expression is used instead. -If the C</g> option is not used, C<m//> in a list context returns a +If the C</g> option is not used, C<m//> in list context returns a list consisting of the subexpressions matched by the parentheses in the pattern, i.e., (C<$1>, C<$2>, C<$3>...). (Note that here C<$1> etc. are also set, and that this differs from Perl 4's behavior.) When there are @@ -802,15 +830,16 @@ remainder of the line, and assigns those three fields to $F1, $F2, and $Etc. The conditional is true if any variables were assigned, i.e., if the pattern matched. -The C</g> modifier specifies global pattern matching--that is, matching -as many times as possible within the string. How it behaves depends on -the context. In list context, it returns a list of all the -substrings matched by all the parentheses in the regular expression. -If there are no parentheses, it returns a list of all the matched -strings, as if there were parentheses around the whole pattern. +The C</g> modifier specifies global pattern matching--that is, +matching as many times as possible within the string. How it behaves +depends on the context. In list context, it returns a list of the +substrings matched by any capturing parentheses in the regular +expression. If there are no parentheses, it returns a list of all +the matched strings, as if there were parentheses around the whole +pattern. In scalar context, each execution of C<m//g> finds the next match, -returning TRUE if it matches, and FALSE if there is no further match. +returning true if it matches, and false if there is no further match. The position after the last match can be read or set using the pos() function; see L<perlfunc/pos>. A failed match normally resets the search position to the beginning of the string, but you can avoid that @@ -820,8 +849,8 @@ string also resets the search position. You can intermix C<m//g> matches with C<m/\G.../g>, where C<\G> is a zero-width assertion that matches the exact position where the previous C<m//g>, if any, left off. The C<\G> assertion is not supported without -the C</g> modifier; currently, without C</g>, C<\G> behaves just like -C<\A>, but that's accidental and may change in the future. +the C</g> modifier. (Currently, without C</g>, C<\G> behaves just like +C<\A>, but that's accidental and may change in the future.) Examples: @@ -829,12 +858,10 @@ Examples: ($one,$five,$fifteen) = (`uptime` =~ /(\d+\.\d+)/g); # scalar context - { - local $/ = ""; - while (defined($paragraph = <>)) { - while ($paragraph =~ /[a-z]['")]*[.!?]+['")]*\s/g) { - $sentences++; - } + $/ = ""; $* = 1; # $* deprecated in modern perls + while (defined($paragraph = <>)) { + while ($paragraph =~ /[a-z]['")]*[.!?]+['")]*\s/g) { + $sentences++; } } print "$sentences\n"; @@ -890,7 +917,7 @@ Here is the output (split into several lines): =item C<'STRING'> -A single-quoted, literal string. A backslash represents a backslash +A single-quoted, literal string. A backslash represents a backslash unless followed by the delimiter or another backslash, in which case the delimiter or backslash is interpolated. @@ -906,15 +933,16 @@ A double-quoted, interpolated string. $_ .= qq (*** The previous line contains the naughty word "$1".\n) - if /(tcl|rexx|python)/; # :-) + if /\b(tcl|java|python)\b/i; # :-) $baz = "\n"; # a one-character string -=item qr/PATTERN/imosx +=item qr/STRING/imosx -Quote-as-a-regular-expression operator. I<STRING> is interpolated the -same way as I<PATTERN> in C<m/PATTERN/>. If "'" is used as the -delimiter, no variable interpolation is done. Returns a Perl value -which may be used instead of the corresponding C</STRING/imosx> expression. +This operators quotes--and compiles--its I<STRING> as a regular +expression. I<STRING> is interpolated the same way as I<PATTERN> +in C<m/PATTERN/>. If "'" is used as the delimiter, no interpolation +is done. Returns a Perl value which may be used instead of the +corresponding C</STRING/imosx> expression. For example, @@ -933,7 +961,7 @@ The result may be used as a subpattern in a match: $string =~ /$re/; # or this way Since Perl may compile the pattern at the moment of execution of qr() -operator, using qr() may have speed advantages in I<some> situations, +operator, using qr() may have speed advantages in some situations, notably if the result of qr() is used standalone: sub match { @@ -941,18 +969,18 @@ notably if the result of qr() is used standalone: my @compiled = map qr/$_/i, @$patterns; grep { my $success = 0; - foreach my $pat @compiled { + foreach my $pat (@compiled) { $success = 1, last if /$pat/; } $success; } @_; } -Precompilation of the pattern into an internal representation at the -moment of qr() avoids a need to recompile the pattern every time a -match C</$pat/> is attempted. (Note that Perl has many other -internal optimizations, but none would be triggered in the above -example if we did not use qr() operator.) +Precompilation of the pattern into an internal representation at +the moment of qr() avoids a need to recompile the pattern every +time a match C</$pat/> is attempted. (Perl has many other internal +optimizations, but none would be triggered in the above example if +we did not use qr() operator.) Options are: @@ -1009,7 +1037,7 @@ double-quote interpolation, passing it on to the shell instead: $perl_info = qx(ps $$); # that's Perl's $$ $shell_info = qx'ps $$'; # that's the new shell's $$ -Note that how the string gets evaluated is entirely subject to the command +How that string gets evaluated is entirely subject to the command interpreter on your system. On most platforms, you will have to protect shell metacharacters if you want them treated literally. This is in practice difficult to do, as it's unclear how to escape which characters. @@ -1023,6 +1051,12 @@ multiple commands in a single line by separating them with the command separator character, if your shell supports that (e.g. C<;> on many Unix shells; C<&> on the Windows NT C<cmd> shell). +Beginning with v5.6.0, Perl will attempt to flush all files opened for +output before starting the child process, but this may not be supported +on some platforms (see L<perlport>). To be safe, you may need to set +C<$|> ($AUTOFLUSH in English) or call the C<autoflush()> method of +C<IO::Handle> on any open handles. + Beware that some command shells may place restrictions on the length of the command line. You must ensure your strings don't exceed this limit after any necessary interpolations. See the platform-specific @@ -1041,29 +1075,30 @@ See L<"I/O Operators"> for more discussion. =item qw/STRING/ -Returns a list of the words extracted out of STRING, using embedded -whitespace as the word delimiters. It is exactly equivalent to +Evaluates to a list of the words extracted out of STRING, using embedded +whitespace as the word delimiters. It can be understood as being roughly +equivalent to: split(' ', q/STRING/); -This equivalency means that if used in scalar context, you'll get split's -(unfortunate) scalar context behavior, complete with mysterious warnings. -However do not rely on this as in a future release it could be changed to -be exactly equivalent to the list +the difference being that it generates a real list at compile time. So +this expression: - ('foo', 'bar', 'baz') + qw(foo bar baz) -Which in a scalar context would result in C<'baz'>. +is semantically equivalent to the list: + + 'foo', 'bar', 'baz' Some frequently seen examples: use POSIX qw( setlocale localeconv ) @EXPORT = qw( foo bar baz ); -A common mistake is to try to separate the words with comma or to put -comments into a multi-line C<qw>-string. For this reason the C<-w> -switch produce warnings if the STRING contains the "," or the "#" -character. +A common mistake is to try to separate the words with comma or to +put comments into a multi-line C<qw>-string. For this reason, the +C<use warnings> pragma and the B<-w> switch (that is, the C<$^W> variable) +produces warnings if the STRING contains the "," or the "#" character. =item s/PATTERN/REPLACEMENT/egimosx @@ -1076,7 +1111,7 @@ variable is searched and modified. (The string specified with C<=~> must be scalar variable, an array element, a hash element, or an assignment to one of those, i.e., an lvalue.) -If the delimiter chosen is a single quote, no variable interpolation is +If the delimiter chosen is a single quote, no interpolation is done on either the PATTERN or the REPLACEMENT. Otherwise, if the PATTERN contains a $ that looks like a variable rather than an end-of-string test, the variable will be interpolated into the pattern @@ -1104,10 +1139,11 @@ Perl 4, Perl 5 treats backticks as normal delimiters; the replacement text is not evaluated as a command. If the PATTERN is delimited by bracketing quotes, the REPLACEMENT has its own pair of quotes, which may or may not be bracketing quotes, e.g., -C<s(foo)(bar)> or C<sE<lt>fooE<gt>/bar/>. A C</e> will cause the -replacement portion to be interpreted as a full-fledged Perl expression -and eval()ed right then and there. It is, however, syntax checked at -compile-time. +C<s(foo)(bar)> or C<< s<foo>/bar/ >>. A C</e> will cause the +replacement portion to be treated as a full-fledged Perl expression +and evaluated right then and there. It is, however, syntax checked at +compile-time. A second C<e> modifier will cause the replacement portion +to be C<eval>ed before being run as a Perl expression. Examples: @@ -1134,8 +1170,12 @@ Examples: # symbolic dereferencing s/\$(\w+)/${$1}/g; - # /e's can even nest; this will expand - # any embedded scalar variable (including lexicals) in $_ + # Add one to the value of any numbers in the string + s/(\d+)/1 + $1/eg; + + # This will expand any embedded scalar variable + # (including lexicals) in $_ : First $1 is interpolated + # to the variable name, and then evaluated s/(\$\w+)/$1/eeg; # Delete (most) C comments. @@ -1155,23 +1195,21 @@ Examples: s/([^ ]*) *([^ ]*)/$2 $1/; # reverse 1st two fields Note the use of $ instead of \ in the last example. Unlike -B<sed>, we use the \E<lt>I<digit>E<gt> form in only the left hand side. -Anywhere else it's $E<lt>I<digit>E<gt>. +B<sed>, we use the \<I<digit>> form in only the left hand side. +Anywhere else it's $<I<digit>>. Occasionally, you can't use just a C</g> to get all the changes -to occur. Here are two common cases: +to occur that you might want. Here are two common cases: # put commas in the right places in an integer - 1 while s/(.*\d)(\d\d\d)/$1,$2/g; # perl4 - 1 while s/(\d)(\d\d\d)(?!\d)/$1,$2/g; # perl5 + 1 while s/(\d)(\d\d\d)(?!\d)/$1,$2/g; # expand tabs to 8-column spacing 1 while s/\t+/' ' x (length($&)*8 - length($`)%8)/e; +=item tr/SEARCHLIST/REPLACEMENTLIST/cdsUC -=item tr/SEARCHLIST/REPLACEMENTLIST/cds - -=item y/SEARCHLIST/REPLACEMENTLIST/cds +=item y/SEARCHLIST/REPLACEMENTLIST/cdsUC Transliterates all occurrences of the characters found in the search list with the corresponding character in the replacement list. It returns @@ -1199,15 +1237,17 @@ Options: c Complement the SEARCHLIST. d Delete found but unreplaced characters. s Squash duplicate replaced characters. - -If the C</c> modifier is specified, the SEARCHLIST character set is -complemented. If the C</d> modifier is specified, any characters specified -by SEARCHLIST not found in REPLACEMENTLIST are deleted. (Note -that this is slightly more flexible than the behavior of some B<tr> -programs, which delete anything they find in the SEARCHLIST, period.) -If the C</s> modifier is specified, sequences of characters that were -transliterated to the same character are squashed down to a single instance of the -character. + U Translate to/from UTF-8. + C Translate to/from 8-bit char (octet). + +If the C</c> modifier is specified, the SEARCHLIST character set +is complemented. If the C</d> modifier is specified, any characters +specified by SEARCHLIST not found in REPLACEMENTLIST are deleted. +(Note that this is slightly more flexible than the behavior of some +B<tr> programs, which delete anything they find in the SEARCHLIST, +period.) If the C</s> modifier is specified, sequences of characters +that were transliterated to the same character are squashed down +to a single instance of the character. If the C</d> modifier is used, the REPLACEMENTLIST is always interpreted exactly as specified. Otherwise, if the REPLACEMENTLIST is shorter @@ -1216,6 +1256,10 @@ enough. If the REPLACEMENTLIST is empty, the SEARCHLIST is replicated. This latter is useful for counting characters in a class or for squashing character sequences in a class. +The first C</U> or C</C> modifier applies to the left side of the translation. +The second one applies to the right side. If present, these modifiers override +the current utf8 state. + Examples: $ARGV[1] =~ tr/A-Z/a-z/; # canonicalize to lower case @@ -1235,16 +1279,20 @@ Examples: tr [\200-\377] [\000-\177]; # delete 8th bit -If multiple transliterations are given for a character, only the first one is used: + tr/\0-\xFF//CU; # change Latin-1 to Unicode + tr/\0-\x{FF}//UC; # change Unicode to Latin-1 + +If multiple transliterations are given for a character, only the +first one is used: tr/AAA/XYZ/ will transliterate any A to X. -Note that because the transliteration table is built at compile time, neither +Because the transliteration table is built at compile time, neither the SEARCHLIST nor the REPLACEMENTLIST are subjected to double quote -interpolation. That means that if you want to use variables, you must use -an eval(): +interpolation. That means that if you want to use variables, you +must use an eval(): eval "tr/$oldlist/$newlist/"; die $@ if $@; @@ -1255,52 +1303,52 @@ an eval(): =head2 Gory details of parsing quoted constructs -When presented with something which may have several different -interpretations, Perl uses the principle B<DWIM> (expanded to Do What I Mean -- not what I wrote) to pick up the most probable interpretation of the -source. This strategy is so successful that Perl users usually do not -suspect ambivalence of what they write. However, time to time Perl's ideas -differ from what the author meant. - -The target of this section is to clarify the Perl's way of interpreting -quoted constructs. The most frequent reason one may have to want to know the -details discussed in this section is hairy regular expressions. However, the -first steps of parsing are the same for all Perl quoting operators, so here -they are discussed together. - -The most important detail of Perl parsing rules is the first one -discussed below; when processing a quoted construct, Perl I<first> -finds the end of the construct, then it interprets the contents of the -construct. If you understand this rule, you may skip the rest of this -section on the first reading. The other rules would -contradict user's expectations much less frequently than the first one. - -Some of the passes discussed below are performed concurrently, but as -far as results are the same, we consider them one-by-one. For different -quoting constructs Perl performs different number of passes, from -one to five, but they are always performed in the same order. +When presented with something that might have several different +interpretations, Perl uses the B<DWIM> (that's "Do What I Mean") +principle to pick the most probable interpretation. This strategy +is so successful that Perl programmers often do not suspect the +ambivalence of what they write. But from time to time, Perl's +notions differ substantially from what the author honestly meant. + +This section hopes to clarify how Perl handles quoted constructs. +Although the most common reason to learn this is to unravel labyrinthine +regular expressions, because the initial steps of parsing are the +same for all quoting operators, they are all discussed together. + +The most important Perl parsing rule is the first one discussed +below: when processing a quoted construct, Perl first finds the end +of that construct, then interprets its contents. If you understand +this rule, you may skip the rest of this section on the first +reading. The other rules are likely to contradict the user's +expectations much less frequently than this first one. + +Some passes discussed below are performed concurrently, but because +their results are the same, we consider them individually. For different +quoting constructs, Perl performs different numbers of passes, from +one to five, but these passes are always performed in the same order. =over =item Finding the end -First pass is finding the end of the quoted construct, be it -a multichar delimiter -C<"\nEOF\n"> of C<<<EOF> construct, C</> which terminates C<qq/> construct, -C<]> which terminates C<qq[> construct, or C<E<gt>> which terminates a -fileglob started with C<<>. +The first pass is finding the end of the quoted construct, whether +it be a multicharacter delimiter C<"\nEOF\n"> in the C<<<EOF> +construct, a C</> that terminates a C<qq//> construct, a C<]> which +terminates C<qq[]> construct, or a C<< > >> which terminates a +fileglob started with C<< < >>. -When searching for one-char non-matching delimiter, such as C</>, combinations -C<\\> and C<\/> are skipped. When searching for one-char matching delimiter, -such as C<]>, combinations C<\\>, C<\]> and C<\[> are skipped, and -nested C<[>, C<]> are skipped as well. When searching for multichar delimiter -no skipping is performed. +When searching for single-character non-pairing delimiters, such +as C</>, combinations of C<\\> and C<\/> are skipped. However, +when searching for single-character pairing delimiter like C<[>, +combinations of C<\\>, C<\]>, and C<\[> are all skipped, and nested +C<[>, C<]> are skipped as well. When searching for multicharacter +delimiters, nothing is skipped. -For constructs with 3-part delimiters (C<s///> etc.) the search is -repeated once more. +For constructs with three-part delimiters (C<s///>, C<y///>, and +C<tr///>), the search is repeated once more. -During this search no attention is paid to the semantic of the construct, -thus: +During this search no attention is paid to the semantics of the construct. +Thus: "$hash{"$foo/$bar"}" @@ -1310,30 +1358,28 @@ or: bar # NOT a comment, this slash / terminated m//! /x -do not form legal quoted expressions, the quoted part ends on the first C<"> -and C</>, and the rest happens to be a syntax error. Note that since the slash -which terminated C<m//> was followed by a C<SPACE>, the above is not C<m//x>, -but rather C<m//> with no 'x' switch. So the embedded C<#> is interpreted -as a literal C<#>. +do not form legal quoted expressions. The quoted part ends on the +first C<"> and C</>, and the rest happens to be a syntax error. +Because the slash that terminated C<m//> was followed by a C<SPACE>, +the example above is not C<m//x>, but rather C<m//> with no C</x> +modifier. So the embedded C<#> is interpreted as a literal C<#>. =item Removal of backslashes before delimiters -During the second pass the text between the starting delimiter and -the ending delimiter is copied to a safe location, and the C<\> is -removed from combinations consisting of C<\> and delimiter(s) (both starting -and ending delimiter if they differ). - -The removal does not happen for multi-char delimiters. - -Note that the combination C<\\> is left as it was! +During the second pass, text between the starting and ending +delimiters is copied to a safe location, and the C<\> is removed +from combinations consisting of C<\> and delimiter--or delimiters, +meaning both starting and ending delimiters will should these differ. +This removal does not happen for multi-character delimiters. +Note that the combination C<\\> is left intact, just as it was. -Starting from this step no information about the delimiter(s) is used in the -parsing. +Starting from this step no information about the delimiters is +used in parsing. =item Interpolation -Next step is interpolation in the obtained delimiter-independent text. -There are four different cases. +The next step is interpolation in the text obtained, which is now +delimiter-independent. There are four different cases. =over @@ -1345,46 +1391,42 @@ No interpolation is performed. The only interpolation is removal of C<\> from pairs C<\\>. -=item C<"">, C<``>, C<qq//>, C<qx//>, C<<file*globE<gt>> - -C<\Q>, C<\U>, C<\u>, C<\L>, C<\l> (possibly paired with C<\E>) are converted -to corresponding Perl constructs, thus C<"$foo\Qbaz$bar"> is converted to : - - $foo . (quotemeta("baz" . $bar)); +=item C<"">, C<``>, C<qq//>, C<qx//>, C<< <file*glob> >> -Other combinations of C<\> with following chars are substituted with -appropriate expansions. +C<\Q>, C<\U>, C<\u>, C<\L>, C<\l> (possibly paired with C<\E>) are +converted to corresponding Perl constructs. Thus, C<"$foo\Qbaz$bar"> +is converted to C<$foo . (quotemeta("baz" . $bar))> internally. +The other combinations are replaced with appropriate expansions. -Let it be stressed that I<whatever is between C<\Q> and C<\E>> is interpolated -in the usual way. Say, C<"\Q\\E"> has no C<\E> inside: it has C<\Q>, C<\\>, -and C<E>, thus the result is the same as for C<"\\\\E">. Generally speaking, -having backslashes between C<\Q> and C<\E> may lead to counterintuitive -results. So, C<"\Q\t\E"> is converted to: - - quotemeta("\t") - -which is the same as C<"\\\t"> (since TAB is not alphanumerical). Note also -that: +Let it be stressed that I<whatever falls between C<\Q> and C<\E>> +is interpolated in the usual way. Something like C<"\Q\\E"> has +no C<\E> inside. instead, it has C<\Q>, C<\\>, and C<E>, so the +result is the same as for C<"\\\\E">. As a general rule, backslashes +between C<\Q> and C<\E> may lead to counterintuitive results. So, +C<"\Q\t\E"> is converted to C<quotemeta("\t")>, which is the same +as C<"\\\t"> (since TAB is not alphanumeric). Note also that: $str = '\t'; return "\Q$str"; may be closer to the conjectural I<intention> of the writer of C<"\Q\t\E">. -Interpolated scalars and arrays are internally converted to the C<join> and -C<.> Perl operations, thus C<"$foo >>> '@arr'"> becomes: +Interpolated scalars and arrays are converted internally to the C<join> and +C<.> catentation operations. Thus, C<"$foo XXX '@arr'"> becomes: - $foo . " >>> '" . (join $", @arr) . "'"; + $foo . " XXX '" . (join $", @arr) . "'"; -All the operations in the above are performed simultaneously left-to-right. +All operations above are performed simultaneously, left to right. -Since the result of "\Q STRING \E" has all the metacharacters quoted -there is no way to insert a literal C<$> or C<@> inside a C<\Q\E> pair: if -protected by C<\> C<$> will be quoted to became "\\\$", if not, it is -interpreted as starting an interpolated scalar. +Because the result of C<"\Q STRING \E"> has all metacharacters +quoted, there is no way to insert a literal C<$> or C<@> inside a +C<\Q\E> pair. If protected by C<\>, C<$> will be quoted to became +C<"\\\$">; if not, it is interpreted as the start of an interpolated +scalar. -Note also that the interpolating code needs to make a decision on where the -interpolated scalar ends. For instance, whether C<"a $b -E<gt> {c}"> means: +Note also that the interpolation code needs to make a decision on +where the interpolated scalar ends. For instance, whether +C<< "a $b -> {c}" >> really means: "a " . $b . " -> {c}"; @@ -1392,99 +1434,108 @@ or: "a " . $b -> {c}; -I<Most of the time> the decision is to take the longest possible text which -does not include spaces between components and contains matching -braces/brackets. Since the outcome may be determined by I<voting> based -on heuristic estimators, the result I<is not strictly predictable>, but -is usually correct for the ambiguous cases. +Most of the time, the longest possible text that does not include +spaces between components and which contains matching braces or +brackets. because the outcome may be determined by voting based +on heuristic estimators, the result is not strictly predictable. +Fortunately, it's usually correct for ambiguous cases. =item C<?RE?>, C</RE/>, C<m/RE/>, C<s/RE/foo/>, -Processing of C<\Q>, C<\U>, C<\u>, C<\L>, C<\l> and interpolation happens -(almost) as with C<qq//> constructs, but I<the substitution of C<\> followed by -RE-special chars (including C<\>) is not performed>! Moreover, -inside C<(?{BLOCK})>, C<(?# comment )>, and C<#>-comment of -C<//x>-regular expressions no processing is performed at all. -This is the first step where presence of the C<//x> switch is relevant. - -Interpolation has several quirks: C<$|>, C<$(> and C<$)> are not interpolated, and -constructs C<$var[SOMETHING]> are I<voted> (by several different estimators) -to be an array element or C<$var> followed by a RE alternative. This is -the place where the notation C<${arr[$bar]}> comes handy: C</${arr[0-9]}/> -is interpreted as an array element C<-9>, not as a regular expression from -variable C<$arr> followed by a digit, which is the interpretation of -C</$arr[0-9]/>. Since voting among different estimators may be performed, -the result I<is not predictable>. - -It is on this step that C<\1> is converted to C<$1> in the replacement -text of C<s///>. - -Note that absence of processing of C<\\> creates specific restrictions on the -post-processed text: if the delimiter is C</>, one cannot get the combination -C<\/> into the result of this step: C</> will finish the regular expression, -C<\/> will be stripped to C</> on the previous step, and C<\\/> will be left -as is. Since C</> is equivalent to C<\/> inside a regular expression, this -does not matter unless the delimiter is a special character for the RE engine, -as in C<s*foo*bar*>, C<m[foo]>, or C<?foo?>, or an alphanumeric char, as in: +Processing of C<\Q>, C<\U>, C<\u>, C<\L>, C<\l>, and interpolation +happens (almost) as with C<qq//> constructs, but the substitution +of C<\> followed by RE-special chars (including C<\>) is not +performed. Moreover, inside C<(?{BLOCK})>, C<(?# comment )>, and +a C<#>-comment in a C<//x>-regular expression, no processing is +performed whatsoever. This is the first step at which the presence +of the C<//x> modifier is relevant. + +Interpolation has several quirks: C<$|>, C<$(>, and C<$)> are not +interpolated, and constructs C<$var[SOMETHING]> are voted (by several +different estimators) to be either an array element or C<$var> +followed by an RE alternative. This is where the notation +C<${arr[$bar]}> comes handy: C</${arr[0-9]}/> is interpreted as +array element C<-9>, not as a regular expression from the variable +C<$arr> followed by a digit, which would be the interpretation of +C</$arr[0-9]/>. Since voting among different estimators may occur, +the result is not predictable. + +It is at this step that C<\1> is begrudgingly converted to C<$1> in +the replacement text of C<s///> to correct the incorrigible +I<sed> hackers who haven't picked up the saner idiom yet. A warning +is emitted if the C<use warnings> pragma or the B<-w> command-line flag +(that is, the C<$^W> variable) was set. + +The lack of processing of C<\\> creates specific restrictions on +the post-processed text. If the delimiter is C</>, one cannot get +the combination C<\/> into the result of this step. C</> will +finish the regular expression, C<\/> will be stripped to C</> on +the previous step, and C<\\/> will be left as is. Because C</> is +equivalent to C<\/> inside a regular expression, this does not +matter unless the delimiter happens to be character special to the +RE engine, such as in C<s*foo*bar*>, C<m[foo]>, or C<?foo?>; or an +alphanumeric char, as in: m m ^ a \s* b mmx; -In the above RE, which is intentionally obfuscated for illustration, the +In the RE above, which is intentionally obfuscated for illustration, the delimiter is C<m>, the modifier is C<mx>, and after backslash-removal the -RE is the same as for C<m/ ^ a s* b /mx>). +RE is the same as for C<m/ ^ a s* b /mx>). There's more than one +reason you're encouraged to restrict your delimiters to non-alphanumeric, +non-whitespace choices. =back -This step is the last one for all the constructs except regular expressions, +This step is the last one for all constructs except regular expressions, which are processed further. =item Interpolation of regular expressions -All the previous steps were performed during the compilation of Perl code, -this one happens in run time (though it may be optimized to be calculated -at compile time if appropriate). After all the preprocessing performed -above (and possibly after evaluation if catenation, joining, up/down-casing -and C<quotemeta()>ing are involved) the resulting I<string> is passed to RE -engine for compilation. - -Whatever happens in the RE engine is better be discussed in L<perlre>, -but for the sake of continuity let us do it here. - -This is another step where presence of the C<//x> switch is relevant. -The RE engine scans the string left-to-right, and converts it to a finite -automaton. - -Backslashed chars are either substituted by corresponding literal -strings (as with C<\{>), or generate special nodes of the finite automaton -(as with C<\b>). Characters which are special to the RE engine (such as -C<|>) generate corresponding nodes or groups of nodes. C<(?#...)> -comments are ignored. All the rest is either converted to literal strings -to match, or is ignored (as is whitespace and C<#>-style comments if -C<//x> is present). - -Note that the parsing of the construct C<[...]> is performed using -rather different rules than for the rest of the regular expression. -The terminator of this construct is found using the same rules as for -finding a terminator of a C<{}>-delimited construct, the only exception -being that C<]> immediately following C<[> is considered as if preceded -by a backslash. Similarly, the terminator of C<(?{...})> is found using -the same rules as for finding a terminator of a C<{}>-delimited construct. - -It is possible to inspect both the string given to RE engine, and the -resulting finite automaton. See arguments C<debug>/C<debugcolor> -of C<use L<re>> directive, and/or B<-Dr> option of Perl in -L<perlrun/Switches>. +Previous steps were performed during the compilation of Perl code, +but this one happens at run time--although it may be optimized to +be calculated at compile time if appropriate. After preprocessing +described above, and possibly after evaluation if catenation, +joining, casing translation, or metaquoting are involved, the +resulting I<string> is passed to the RE engine for compilation. + +Whatever happens in the RE engine might be better discussed in L<perlre>, +but for the sake of continuity, we shall do so here. + +This is another step where the presence of the C<//x> modifier is +relevant. The RE engine scans the string from left to right and +converts it to a finite automaton. + +Backslashed characters are either replaced with corresponding +literal strings (as with C<\{>), or else they generate special nodes +in the finite automaton (as with C<\b>). Characters special to the +RE engine (such as C<|>) generate corresponding nodes or groups of +nodes. C<(?#...)> comments are ignored. All the rest is either +converted to literal strings to match, or else is ignored (as is +whitespace and C<#>-style comments if C<//x> is present). + +Parsing of the bracketed character class construct, C<[...]>, is +rather different than the rule used for the rest of the pattern. +The terminator of this construct is found using the same rules as +for finding the terminator of a C<{}>-delimited construct, the only +exception being that C<]> immediately following C<[> is treated as +though preceded by a backslash. Similarly, the terminator of +C<(?{...})> is found using the same rules as for finding the +terminator of a C<{}>-delimited construct. + +It is possible to inspect both the string given to RE engine and the +resulting finite automaton. See the arguments C<debug>/C<debugcolor> +in the C<use L<re>> pragma, as well as Perl's B<-Dr> command-line +switch documented in L<perlrun/"Command Switches">. =item Optimization of regular expressions This step is listed for completeness only. Since it does not change semantics, details of this step are not documented and are subject -to change. This step is performed over the finite automaton generated -during the previous pass. +to change without notice. This step is performed over the finite +automaton that was generated during the previous pass. -However, in older versions of Perl C<L<split>> used to silently -optimize C</^/> to mean C</^/m>. This behaviour, though present -in current versions of Perl, may be deprecated in future. +It is at this stage that C<split()> silently optimizes C</^/> to +mean C</^/m>. =back @@ -1493,39 +1544,40 @@ in current versions of Perl, may be deprecated in future. There are several I/O operators you should know about. A string enclosed by backticks (grave accents) first undergoes -variable substitution just like a double quoted string. It is then -interpreted as a command, and the output of that command is the value -of the pseudo-literal, like in a shell. In scalar context, a single -string consisting of all the output is returned. In list context, -a list of values is returned, one for each line of output. (You can -set C<$/> to use a different line terminator.) The command is executed +double-quote interpolation. It is then interpreted as an external +command, and the output of that command is the value of the +pseudo-literal, j +string consisting of all output is returned. In list context, a +list of values is returned, one per line of output. (You can set +C<$/> to use a different line terminator.) The command is executed each time the pseudo-literal is evaluated. The status value of the command is returned in C<$?> (see L<perlvar> for the interpretation of C<$?>). Unlike in B<csh>, no translation is done on the return data--newlines remain newlines. Unlike in any of the shells, single quotes do not hide variable names in the command from interpretation. -To pass a $ through to the shell you need to hide it with a backslash. -The generalized form of backticks is C<qx//>. (Because backticks -always undergo shell expansion as well, see L<perlsec> for -security concerns.) - -In a scalar context, evaluating a filehandle in angle brackets yields the -next line from that file (newline, if any, included), or C<undef> at -end-of-file. When C<$/> is set to C<undef> (i.e. file slurp mode), -and the file is empty, it returns C<''> the first time, followed by -C<undef> subsequently. - -Ordinarily you must assign the returned value to a variable, but there is one -situation where an automatic assignment happens. I<If and ONLY if> the -input symbol is the only thing inside the conditional of a C<while> or -C<for(;;)> loop, the value is automatically assigned to the variable -C<$_>. In these loop constructs, the assigned value (whether assignment -is automatic or explicit) is then tested to see if it is defined. -The defined test avoids problems where line has a string value -that would be treated as false by perl e.g. "" or "0" with no trailing -newline. (This may seem like an odd thing to you, but you'll use the -construct in almost every Perl script you write.) Anyway, the following -lines are equivalent to each other: +To pass a literal dollar-sign through to the shell you need to hide +it with a backslash. The generalized form of backticks is C<qx//>. +(Because backticks always undergo shell expansion as well, see +L<perlsec> for security concerns.) + +In scalar context, evaluating a filehandle in angle brackets yields +the next line from that file (the newline, if any, included), or +C<undef> at end-of-file or on error. When C<$/> is set to C<undef> +(sometimes known as file-slurp mode) and the file is empty, it +returns C<''> the first time, followed by C<undef> subsequently. + +Ordinarily you must assign the returned value to a variable, but +there is one situation where an automatic assignment happens. If +and only if the input symbol is the only thing inside the conditional +of a C<while> statement (even if disguised as a C<for(;;)> loop), +the value is automatically assigned to the global variable $_, +destroying whatever was there previously. (This may seem like an +odd thing to you, but you'll use the construct in almost every Perl +script you write.) The $_ variables is not implicitly localized. +You'll have to put a C<local $_;> before the loop if you want that +to happen. + +The following lines are equivalent: while (defined($_ = <STDIN>)) { print; } while ($_ = <STDIN>) { print; } @@ -1535,37 +1587,44 @@ lines are equivalent to each other: print while ($_ = <STDIN>); print while <STDIN>; -and this also behaves similarly, but avoids the use of $_ : +This also behaves similarly, but avoids $_ : while (my $line = <STDIN>) { print $line } -If you really mean such values to terminate the loop they should be -tested for explicitly: +In these loop constructs, the assigned value (whether assignment +is automatic or explicit) is then tested to see whether it is +defined. The defined test avoids problems where line has a string +value that would be treated as false by Perl, for example a "" or +a "0" with no trailing newline. If you really mean for such values +to terminate the loop, they should be tested for explicitly: while (($_ = <STDIN>) ne '0') { ... } while (<STDIN>) { last unless $_; ... } -In other boolean contexts, C<E<lt>I<filehandle>E<gt>> without explicit C<defined> -test or comparison will solicit a warning if C<-w> is in effect. +In other boolean contexts, C<< <I<filehandle>> >> without an +explicit C<defined> test or comparison elicit a warning if the +C<use warnings> pragma or the B<-w> +command-line switch (the C<$^W> variable) is in effect. The filehandles STDIN, STDOUT, and STDERR are predefined. (The -filehandles C<stdin>, C<stdout>, and C<stderr> will also work except in -packages, where they would be interpreted as local identifiers rather -than global.) Additional filehandles may be created with the open() -function. See L<perlfunc/open> for details on this. - -If a E<lt>FILEHANDLEE<gt> is used in a context that is looking for a list, a -list consisting of all the input lines is returned, one line per list -element. It's easy to make a I<LARGE> data space this way, so use with -care. - -E<lt>FILEHANDLEE<gt> may also be spelt readline(FILEHANDLE). See -L<perlfunc/readline>. - -The null filehandle E<lt>E<gt> is special and can be used to emulate the -behavior of B<sed> and B<awk>. Input from E<lt>E<gt> comes either from +filehandles C<stdin>, C<stdout>, and C<stderr> will also work except +in packages, where they would be interpreted as local identifiers +rather than global.) Additional filehandles may be created with +the open() function, amongst others. See L<perlopentut> and +L<perlfunc/open> for details on this. + +If a <FILEHANDLE> is used in a context that is looking for +a list, a list comprising all input lines is returned, one line per +list element. It's easy to grow to a rather large data space this +way, so use with care. + +<FILEHANDLE> may also be spelled C<readline(*FILEHANDLE)>. +See L<perlfunc/readline>. + +The null filehandle <> is special: it can be used to emulate the +behavior of B<sed> and B<awk>. Input from <> comes either from standard input, or from each file listed on the command line. Here's -how it works: the first time E<lt>E<gt> is evaluated, the @ARGV array is +how it works: the first time <> is evaluated, the @ARGV array is checked, and if it is empty, C<$ARGV[0]> is set to "-", which when opened gives you standard input. The @ARGV array is then processed as a list of filenames. The loop @@ -1584,16 +1643,17 @@ is equivalent to the following Perl-like pseudo code: } } -except that it isn't so cumbersome to say, and will actually work. It -really does shift array @ARGV and put the current filename into variable -$ARGV. It also uses filehandle I<ARGV> internally--E<lt>E<gt> is just a -synonym for E<lt>ARGVE<gt>, which is magical. (The pseudo code above -doesn't work because it treats E<lt>ARGVE<gt> as non-magical.) +except that it isn't so cumbersome to say, and will actually work. +It really does shift the @ARGV array and put the current filename +into the $ARGV variable. It also uses filehandle I<ARGV> +internally--<> is just a synonym for <ARGV>, which +is magical. (The pseudo code above doesn't work because it treats +<ARGV> as non-magical.) -You can modify @ARGV before the first E<lt>E<gt> as long as the array ends up +You can modify @ARGV before the first <> as long as the array ends up containing the list of filenames you really want. Line numbers (C<$.>) -continue as if the input were one big happy file. (But see example -under C<eof> for how to reset line numbers on each file.) +continue as though the input were one big happy file. See the example +in L<perlfunc/eof> for how to reset line numbers on each file. If you want to set @ARGV to your own list of files, go right ahead. This sets @ARGV to all plain text files if no @ARGV was given: @@ -1620,13 +1680,14 @@ Getopts modules or put a loop on the front like this: # ... # code for each line } -The E<lt>E<gt> symbol will return C<undef> for end-of-file only once. -If you call it again after this it will assume you are processing another -@ARGV list, and if you haven't set @ARGV, will input from STDIN. +The <> symbol will return C<undef> for end-of-file only once. +If you call it again after this, it will assume you are processing another +@ARGV list, and if you haven't set @ARGV, will read input from STDIN. -If the string inside the angle brackets is a reference to a scalar -variable (e.g., E<lt>$fooE<gt>), then that variable contains the name of the -filehandle to input from, or its typeglob, or a reference to the same. For example: +If angle brackets contain is a simple scalar variable (e.g., +<$foo>), then that variable contains the name of the +filehandle to input from, or its typeglob, or a reference to the +same. For example: $fh = \*STDIN; $line = <$fh>; @@ -1635,25 +1696,25 @@ If what's within the angle brackets is neither a filehandle nor a simple scalar variable containing a filehandle name, typeglob, or typeglob reference, it is interpreted as a filename pattern to be globbed, and either a list of filenames or the next filename in the list is returned, -depending on context. This distinction is determined on syntactic -grounds alone. That means C<E<lt>$xE<gt>> is always a readline from -an indirect handle, but C<E<lt>$hash{key}E<gt>> is always a glob. +depending on context. This distinction is determined on syntactic +grounds alone. That means C<< <$x> >> is always a readline() from +an indirect handle, but C<< <$hash{key}> >> is always a glob(). That's because $x is a simple scalar variable, but C<$hash{key}> is not--it's a hash element. One level of double-quote interpretation is done first, but you can't -say C<E<lt>$fooE<gt>> because that's an indirect filehandle as explained +say C<< <$foo> >> because that's an indirect filehandle as explained in the previous paragraph. (In older versions of Perl, programmers would insert curly brackets to force interpretation as a filename glob: -C<E<lt>${foo}E<gt>>. These days, it's considered cleaner to call the +C<< <${foo}> >>. These days, it's considered cleaner to call the internal function directly as C<glob($foo)>, which is probably the right -way to have done it in the first place.) Example: +way to have done it in the first place.) For example: while (<*.c>) { chmod 0644, $_; } -is equivalent to +is roughly equivalent to: open(FOO, "echo *.c | tr -s ' \t\r\f' '\\012\\012\\012\\012'|"); while (<FOO>) { @@ -1661,27 +1722,22 @@ is equivalent to chmod 0644, $_; } -In fact, it's currently implemented that way. (Which means it will not -work on filenames with spaces in them unless you have csh(1) on your -machine.) Of course, the shortest way to do the above is: +except that the globbing is actually done internally using the standard +C<File::Glob> extension. Of course, the shortest way to do the above is: chmod 0644, <*.c>; -Because globbing invokes a shell, it's often faster to call readdir() yourself -and do your own grep() on the filenames. Furthermore, due to its current -implementation of using a shell, the glob() routine may get "Arg list too -long" errors (unless you've installed tcsh(1L) as F</bin/csh>). - -A glob evaluates its (embedded) argument only when it is starting a new -list. All values must be read before it will start over. In a list -context this isn't important, because you automatically get them all -anyway. In scalar context, however, the operator returns the next value -each time it is called, or a C<undef> value if you've just run out. As -for filehandles an automatic C<defined> is generated when the glob -occurs in the test part of a C<while> or C<for> - because legal glob returns -(e.g. a file called F<0>) would otherwise terminate the loop. -Again, C<undef> is returned only once. So if you're expecting a single value -from a glob, it is much better to say +A (file)glob evaluates its (embedded) argument only when it is +starting a new list. All values must be read before it will start +over. In list context, this isn't important because you automatically +get them all anyway. However, in scalar context the operator returns +the next value each time it's called, or C +run out. As with filehandle reads, an automatic C<defined> is +generated when the glob occurs in the test part of a C<while>, +because legal glob returns (e.g. a file called F<0>) would otherwise +terminate the loop. Again, C<undef> is returned only once. So if +you're expecting a single value from a glob, it is much better to +say ($file) = <blurch*>; @@ -1690,7 +1746,7 @@ than $file = <blurch*>; because the latter will alternate between returning a filename and -returning FALSE. +returning false. It you're trying to do variable interpolation, it's definitely better to use the glob() function, because the older notation can cause people @@ -1702,10 +1758,10 @@ to become confused with the indirect filehandle notation. =head2 Constant Folding Like C, Perl does a certain amount of expression evaluation at -compile time, whenever it determines that all arguments to an +compile time whenever it determines that all arguments to an operator are static and have no side effects. In particular, string concatenation happens at compile time between literals that don't do -variable substitution. Backslash interpretation also happens at +variable substitution. Backslash interpolation also happens at compile time. You can say 'Now is the time for all' . "\n" . @@ -1718,20 +1774,20 @@ you say if (-s $file > 5 + 100 * 2**16) { } } -the compiler will precompute the number that -expression represents so that the interpreter -won't have to. +the compiler will precompute the number which that expression +represents so that the interpreter won't have to. =head2 Bitwise String Operators Bitstrings of any size may be manipulated by the bitwise operators (C<~ | & ^>). -If the operands to a binary bitwise op are strings of different sizes, -B<|> and B<^> ops will act as if the shorter operand had additional -zero bits on the right, while the B<&> op will act as if the longer -operand were truncated to the length of the shorter. Note that the -granularity for such extension or truncation is one or more I<bytes>. +If the operands to a binary bitwise op are strings of different +sizes, B<|> and B<^> ops act as though the shorter operand had +additional zero bits on the right, while the B<&> op acts as though +the longer operand were truncated to the length of the shorter. +The granularity for such extension or truncation is one or more +bytes. # ASCII-based examples print "j p \n" ^ " a h"; # prints "JAPH\n" @@ -1739,9 +1795,9 @@ granularity for such extension or truncation is one or more I<bytes>. print "japh\nJunk" & '_____'; # prints "JAPH\n"; print 'p N$' ^ " E<H\n"; # prints "Perl\n"; -If you are intending to manipulate bitstrings, you should be certain that +If you are intending to manipulate bitstrings, be certain that you're supplying bitstrings: If an operand is a number, that will imply -a B<numeric> bitwise operation. You may explicitly show which type of +a B<numeric> bitwise operation. You may explicitly show which type of operation you intend by using C<""> or C<0+>, as in the examples below. $foo = 150 | 105 ; # yields 255 (0x96 | 0x69 is 0xFF) @@ -1757,33 +1813,39 @@ in a bit vector. =head2 Integer Arithmetic -By default Perl assumes that it must do most of its arithmetic in +By default, Perl assumes that it must do most of its arithmetic in floating point. But by saying use integer; you may tell the compiler that it's okay to use integer operations -from here to the end of the enclosing BLOCK. An inner BLOCK may -countermand this by saying +(if it feels like it) from here to the end of the enclosing BLOCK. +An inner BLOCK may countermand this by saying no integer; -which lasts until the end of that BLOCK. - -The bitwise operators ("&", "|", "^", "~", "<<", and ">>") always -produce integral results. (But see also L<Bitwise String Operators>.) -However, C<use integer> still has meaning -for them. By default, their results are interpreted as unsigned -integers. However, if C<use integer> is in effect, their results are -interpreted as signed integers. For example, C<~0> usually evaluates -to a large integral value. However, C<use integer; ~0> is -1 on twos-complement machines. +which lasts until the end of that BLOCK. Note that this doesn't +mean everything is only an integer, merely that Perl may use integer +operations if it is so inclined. For example, even under C<use +integer>, if you take the C<sqrt(2)>, you'll still get C<1.4142135623731> +or so. + +Used on numbers, the bitwise operators ("&", "|", "^", "~", "<<", +and ">>") always produce integral results. (But see also L<Bitwise +String Operators>.) However, C<use integer> still has meaning for +them. By default, their results are interpreted as unsigned integers, but +if C<use integer> is in effect, their results are interpreted +as signed integers. For example, C<~0> usually evaluates to a large +integral value. However, C<use integer; ~0> is C<-1> on twos-complement +machines. =head2 Floating-point Arithmetic While C<use integer> provides integer-only arithmetic, there is no -similar ways to provide rounding or truncation at a certain number of -decimal places. For rounding to a certain number of digits, sprintf() -or printf() is usually the easiest route. +analogous mechanism to provide automatic rounding or truncation to a +certain number of decimal places. For rounding to a certain number +of digits, sprintf() or printf() is usually the easiest route. +See L<perlfaq4>. Floating-point numbers are only approximations to what a mathematician would call real numbers. There are infinitely more reals than floats, @@ -1807,10 +1869,10 @@ this topic. } The POSIX module (part of the standard perl distribution) implements -ceil(), floor(), and a number of other mathematical and trigonometric -functions. The Math::Complex module (part of the standard perl -distribution) defines a number of mathematical functions that can also -work on real numbers. Math::Complex not as efficient as POSIX, but +ceil(), floor(), and other mathematical and trigonometric functions. +The Math::Complex module (part of the standard perl distribution) +defines mathematical functions that work on both the reals and the +imaginary numbers. Math::Complex not as efficient as POSIX, but POSIX can't work with complex numbers. Rounding in financial applications can have serious implications, and @@ -1822,13 +1884,19 @@ need yourself. =head2 Bigger Numbers The standard Math::BigInt and Math::BigFloat modules provide -variable precision arithmetic and overloaded operators. -At the cost of some space and considerable speed, they -avoid the normal pitfalls associated with limited-precision -representations. +variable-precision arithmetic and overloaded operators, although +they're currently pretty slow. At the cost of some space and +considerable speed, they avoid the normal pitfalls associated with +limited-precision representations. use Math::BigInt; $x = Math::BigInt->new('123456789123456789'); print $x * $x; # prints +15241578780673678515622620750190521 + +The non-standard modules SSLeay::BN and Math::Pari provide +equivalent functionality (and much more) with a substantial +performance savings. + +=cut diff --git a/contrib/perl5/pod/perlopentut.pod b/contrib/perl5/pod/perlopentut.pod index 6e6091a..9cb9f67 100644 --- a/contrib/perl5/pod/perlopentut.pod +++ b/contrib/perl5/pod/perlopentut.pod @@ -73,7 +73,7 @@ from a different file, and forget to trim it before opening: This is not a bug, but a feature. Because C<open> mimics the shell in its style of using redirection arrows to specify how to open the file, it also does so with respect to extra white space around the filename itself -as well. For accessing files with naughty names, see L</"Dispelling +as well. For accessing files with naughty names, see L<"Dispelling the Dweomer">. =head2 Pipe Opens @@ -84,7 +84,7 @@ C<popen> function. But in the shell, you just use a different redirection character. That's also the case for Perl. The C<open> call remains the same--just its argument differs. -If the leading character is a pipe symbol, C<open) starts up a new +If the leading character is a pipe symbol, C<open> starts up a new command and open a write-only filehandle leading into that command. This lets you write into that handle and have what you write show up on that command's standard input. For example: @@ -123,9 +123,9 @@ special way. If you open minus for reading, it really means to access the standard input. If you open minus for writing, it really means to access the standard output. -If minus can be used as the default input or default output? What happens +If minus can be used as the default input or default output, what happens if you open a pipe into or out of minus? What's the default command it -would run? The same script as you're current running! This is actually +would run? The same script as you're currently running! This is actually a stealth C<fork> hidden inside an C<open> call. See L<perlipc/"Safe Pipe Opens"> for details. @@ -175,7 +175,7 @@ L<perlfaq5> for more details. One of the most common uses for C<open> is one you never even notice. When you process the ARGV filehandle using -C<E<lt>ARGVE<gt>>, Perl actually does an implicit open +C<< <ARGV> >>, Perl actually does an implicit open on each file in @ARGV. Thus a program called like this: $ myprogram file1 file2 file3 @@ -189,7 +189,7 @@ using a construct no more complex than: If @ARGV is empty when the loop first begins, Perl pretends you've opened up minus, that is, the standard input. In fact, $ARGV, the currently -open file during C<E<lt>ARGVE<gt>> processing, is even set to "-" +open file during C<< <ARGV> >> processing, is even set to "-" in these circumstances. You are welcome to pre-process your @ARGV before starting the loop to @@ -239,7 +239,7 @@ Here's an example: or die "can't open $pwdinfo: $!"; This sort of thing also comes into play in filter processing. Because -C<E<lt>ARGVE<gt>> processing employs the normal, shell-style Perl C<open>, +C<< <ARGV> >> processing employs the normal, shell-style Perl C<open>, it respects all the special things we've already seen: $ myprogram f1 "cmd1|" - f2 "cmd2|" f3 < tmpfile @@ -264,7 +264,7 @@ you can fetch URLs before processing them: @ARGV = map { m#^\w+://# ? "GET $_ |" : $_ } @ARGV; -It's not for nothing that this is called magic C<E<lt>ARGVE<gt>>. +It's not for nothing that this is called magic C<< <ARGV> >>. Pretty nifty, eh? =head1 Open E<agrave> la C @@ -303,11 +303,13 @@ from the Fcntl module, which supplies the following standard flags: O_TRUNC Truncate the file O_NONBLOCK Non-blocking access -Less common flags that are sometimes available on some operating systems -include C<O_BINARY>, C<O_TEXT>, C<O_SHLOCK>, C<O_EXLOCK>, C<O_DEFER>, -C<O_SYNC>, C<O_ASYNC>, C<O_DSYNC>, C<O_RSYNC>, C<O_NOCTTY>, C<O_NDELAY> -and C<O_LARGEFILE>. Consult your open(2) manpage or its local equivalent -for details. +Less common flags that are sometimes available on some operating +systems include C<O_BINARY>, C<O_TEXT>, C<O_SHLOCK>, C<O_EXLOCK>, +C<O_DEFER>, C<O_SYNC>, C<O_ASYNC>, C<O_DSYNC>, C<O_RSYNC>, +C<O_NOCTTY>, C<O_NDELAY> and C<O_LARGEFILE>. Consult your open(2) +manpage or its local equivalent for details. (Note: starting from +Perl release 5.6 the O_LARGEFILE flag, if available, is automatically +added to the sysopen() flags because large files are the the default.) Here's how to use C<sysopen> to emulate the simple C<open> calls we had before. We'll omit the C<|| die $!> checks for clarity, but make sure @@ -391,7 +393,7 @@ folders, cookie files, and internal temporary files. Sometimes you already have a filehandle open, and want to make another handle that's a duplicate of the first one. In the shell, we place an ampersand in front of a file descriptor number when doing redirections. -For example, C<2E<gt>&1> makes descriptor 2 (that's STDERR in Perl) +For example, C<< 2>&1 >> makes descriptor 2 (that's STDERR in Perl) be redirected into descriptor 1 (which is usually Perl's STDOUT). The same is essentially true in Perl: a filename that begins with an ampersand is treated instead as a file descriptor if a number, or as a @@ -442,8 +444,8 @@ these days. Here's an example of that: $fd = $ENV{"MHCONTEXTFD"}; open(MHCONTEXT, "<&=$fd") or die "couldn't fdopen $fd: $!"; -If you're using magic C<E<lt>ARGVE<gt>>, you could even pass in as a -command line argument in @ARGV something like C<"E<lt>&=$MHCONTEXTFD">, +If you're using magic C<< <ARGV> >>, you could even pass in as a +command line argument in @ARGV something like C<"<&=$MHCONTEXTFD">, but we've never seen anyone actually do this. =head2 Dispelling the Dweomer @@ -472,7 +474,7 @@ The only vaguely popular system that doesn't work this way is the proprietary Macintosh system, which uses a colon where the rest of us use a slash. Maybe C<sysopen> isn't such a bad idea after all. -If you want to use C<E<lt>ARGVE<gt>> processing in a totally boring +If you want to use C<< <ARGV> >> processing in a totally boring and non-magical way, you could do this first: # "Sam sat on the ground and put his head in his hands. @@ -494,7 +496,7 @@ to mean standard input, per the standard convention. You've probably noticed how Perl's C<warn> and C<die> functions can produce messages like: - Some warning at scriptname line 29, <FH> chunk 7. + Some warning at scriptname line 29, <FH> line 7. That's because you opened a filehandle FH, and had read in seven records from it. But what was the name of the file, not the handle? @@ -510,7 +512,7 @@ temporarily, then all you have to do is this: Since you're using the pathname of the file as its handle, you'll get warnings more like - Some warning at scriptname line 29, </etc/motd> chunk 7. + Some warning at scriptname line 29, </etc/motd> line 7. =head2 Single Argument Open @@ -694,7 +696,7 @@ the doctor ordered. There's no filehandle interface, but it's still easy to get the contents of a document: use LWP::Simple; - $doc = get('http://www.sn.no/libwww-perl/'); + $doc = get('http://www.linpro.no/lwp/'); =head2 Binary Files diff --git a/contrib/perl5/pod/perlpod.pod b/contrib/perl5/pod/perlpod.pod index 7fa8290..6c0c534 100644 --- a/contrib/perl5/pod/perlpod.pod +++ b/contrib/perl5/pod/perlpod.pod @@ -11,7 +11,6 @@ L<verbatim|/"Verbatim Paragraph">, L<command|/"Command Paragraph">, and L<ordinary text|/"Ordinary Block of Text">. - =head2 Verbatim Paragraph A verbatim paragraph, distinguished by being indented (that is, @@ -20,7 +19,6 @@ with tabs assumed to be on 8-column boundaries. There are no special formatting escapes, so you can't italicize or anything like that. A \ means \, and nothing else. - =head2 Command Paragraph All command paragraphs start with "=", followed by an @@ -75,7 +73,6 @@ or use "=item 1.", "=item 2.", etc., to produce numbered lists, or use or numbers. If you start with bullets or numbers, stick with them, as many formatters use the first "=item" type to decide how to format the list. - =item =for =item =begin @@ -149,20 +146,19 @@ Some examples of lists include: =back - =back - =head2 Ordinary Block of Text It will be filled, and maybe even justified. Certain interior sequences are recognized both here and in commands: - I<text> italicize text, used for emphasis or variables - B<text> embolden text, used for switches and programs - S<text> text contains non-breaking spaces - C<code> literal code + I<text> Italicize text, used for emphasis or variables + B<text> Embolden text, used for switches and programs + S<text> Text contains non-breaking spaces + C<code> Render code in a typewriter font, or give some other + indication that this represents program text L<name> A link (cross reference) to name L<name> manual page L<name/ident> item in manual page @@ -178,7 +174,7 @@ here and in commands: L<text|name/"sec"> L<text|"sec"> L<text|/"sec"> - + F<file> Used for filenames X<index> An index entry Z<> A zero-width character @@ -193,6 +189,40 @@ here and in commands: E<html> Some non-numeric HTML entity, such as E<Agrave> +Most of the time, you will only need a single set of angle brackets to +delimit the beginning and end of interior sequences. However, sometimes +you will want to put a right angle bracket (or greater-than sign '>') +inside of a sequence. This is particularly common when using a sequence +to provide a different font-type for a snippet of code. As with all +things in Perl, there is more than one way to do it. One way is to +simply escape the closing bracket using an C<E> sequence: + + C<$a E<lt>=E<gt> $b> + +This will produce: "C<$a E<lt>=E<gt> $b>" + +A more readable, and perhaps more "plain" way is to use an alternate set of +delimiters that doesn't require a ">" to be escaped. As of perl5.5.660, +doubled angle brackets ("<<" and ">>") may be used I<if and only if there +is whitespace immediately following the opening delimiter and immediately +preceding the closing delimiter!> For example, the following will do the +trick: + + C<< $a <=> $b >> + +In fact, you can use as many repeated angle-brackets as you like so +long as you have the same number of them in the opening and closing +delimiters, and make sure that whitespace immediately follows the last +'<' of the opening delimiter, and immediately precedes the first '>' of +the closing delimiter. So the following will also work: + + C<<< $a <=> $b >>> + C<<<< $a <=> $b >>>> + +This is currently supported by pod2text (Pod::Text), pod2man (Pod::Man), +and any other pod2xxx and Pod::Xxxx translator that uses Pod::Parser +1.093 or later. + =head2 The Intent @@ -223,7 +253,6 @@ TeX, and other markup languages, as used for online documentation. Translators exist for B<pod2man> (that's for nroff(1) and troff(1)), B<pod2text>, B<pod2html>, B<pod2latex>, and B<pod2fm>. - =head2 Embedding Pods in Perl Modules You can embed pod documentation in your Perl scripts. Start your @@ -236,7 +265,6 @@ directive. __END__ - =head1 NAME modern - I am a modern module @@ -244,7 +272,6 @@ directive. If you had not had that empty line there, then the translators wouldn't have seen it. - =head2 Common Pod Pitfalls =over 4 @@ -263,16 +290,15 @@ B<pod2man> for details). Thus, you shouldn't write things like C<the LE<lt>fooE<gt> manpage>, if you want the translated document to read sensibly. -If you don need or want total control of the text used for a -link in the output use the form LE<lt>show this text|fooE<gt> -instead. +If you need total control of the text used for a link in the output +use the form LE<lt>show this text|fooE<gt> instead. =item * -The script F<pod/checkpods.PL> in the Perl source distribution -provides skeletal checking for lines that look empty but aren't -B<only>, but is there as a placeholder until someone writes -Pod::Checker. The best way to check your pod is to pass it through +The B<podchecker> command is provided to check pod syntax +for errors and warnings. For example, it checks for completely +blank lines in pod segments and for unknown escape sequences. +It is still advised to pass it through one or more translators and proofread the result, or print out the result and proofread that. Some of the problems found may be bugs in the translators, which you may or may not wish to work around. @@ -281,7 +307,8 @@ the translators, which you may or may not wish to work around. =head1 SEE ALSO -L<pod2man> and L<perlsyn/"PODs: Embedded Documentation"> +L<pod2man>, L<perlsyn/"PODs: Embedded Documentation">, +L<podchecker> =head1 AUTHOR diff --git a/contrib/perl5/pod/perlport.pod b/contrib/perl5/pod/perlport.pod index c1a5483..6892b6a 100644 --- a/contrib/perl5/pod/perlport.pod +++ b/contrib/perl5/pod/perlport.pod @@ -2,33 +2,33 @@ perlport - Writing portable Perl - =head1 DESCRIPTION -Perl runs on a variety of operating systems. While most of them share -a lot in common, they also have their own very particular and unique -features. +Perl runs on numerous operating systems. While most of them share +much in common, they also have their own unique features. This document is meant to help you to find out what constitutes portable -Perl code, so that once you have made your decision to write portably, +Perl code. That way once you make a decision to write portably, you know where the lines are drawn, and you can stay within them. -There is a tradeoff between taking full advantage of B<a> particular type -of computer, and taking advantage of a full B<range> of them. Naturally, -as you make your range bigger (and thus more diverse), the common -denominators drop, and you are left with fewer areas of common ground in -which you can operate to accomplish a particular task. Thus, when you -begin attacking a problem, it is important to consider which part of the -tradeoff curve you want to operate under. Specifically, whether it is -important to you that the task that you are coding needs the full -generality of being portable, or if it is sufficient to just get the job -done. This is the hardest choice to be made. The rest is easy, because -Perl provides lots of choices, whichever way you want to approach your +There is a tradeoff between taking full advantage of one particular +type of computer and taking advantage of a full range of them. +Naturally, as you broaden your range and become more diverse, the +common factors drop, and you are left with an increasingly smaller +area of common ground in which you can operate to accomplish a +particular task. Thus, when you begin attacking a problem, it is +important to consider under which part of the tradeoff curve you +want to operate. Specifically, you must decide whether it is +important that the task that you are coding have the full generality +of being portable, or whether to just get the job done right now. +This is the hardest choice to be made. The rest is easy, because +Perl provides many choices, whichever way you want to approach your problem. Looking at it another way, writing portable code is usually about -willfully limiting your available choices. Naturally, it takes discipline -to do that. +willfully limiting your available choices. Naturally, it takes +discipline and sacrifice to do that. The product of portability +and convenience may be a constant. You have been warned. Be aware of two important points: @@ -36,24 +36,24 @@ Be aware of two important points: =item Not all Perl programs have to be portable -There is no reason why you should not use Perl as a language to glue Unix +There is no reason you should not use Perl as a language to glue Unix tools together, or to prototype a Macintosh application, or to manage the Windows registry. If it makes no sense to aim for portability for one reason or another in a given program, then don't bother. -=item The vast majority of Perl B<is> portable +=item Nearly all of Perl already I<is> portable Don't be fooled into thinking that it is hard to create portable Perl code. It isn't. Perl tries its level-best to bridge the gaps between what's available on different platforms, and all the means available to use those features. Thus almost all Perl code runs on any machine -without modification. But there I<are> some significant issues in +without modification. But there are some significant issues in writing portable code, and this document is entirely about those issues. =back -Here's the general rule: When you approach a task that is commonly done -using a whole range of platforms, think in terms of writing portable +Here's the general rule: When you approach a task commonly done +using a whole range of platforms, think about writing portable code. That way, you don't sacrifice much by way of the implementation choices you can avail yourself of, and at the same time you can give your users lots of platform choices. On the other hand, when you have to @@ -61,24 +61,21 @@ take advantage of some unique feature of a particular platform, as is often the case with systems programming (whether for Unix, Windows, S<Mac OS>, VMS, etc.), consider writing platform-specific code. -When the code will run on only two or three operating systems, then you -may only need to consider the differences of those particular systems. -The important thing is to decide where the code will run, and to be +When the code will run on only two or three operating systems, you +may need to consider only the differences of those particular systems. +The important thing is to decide where the code will run and to be deliberate in your decision. The material below is separated into three main sections: main issues of portability (L<"ISSUES">, platform-specific issues (L<"PLATFORMS">, and -builtin perl functions that behave differently on various ports +built-in perl functions that behave differently on various ports (L<"FUNCTION IMPLEMENTATIONS">. This information should not be considered complete; it includes possibly transient information about idiosyncrasies of some of the ports, almost -all of which are in a state of constant evolution. Thus this material +all of which are in a state of constant evolution. Thus, this material should be considered a perpetual work in progress -(E<lt>IMG SRC="yellow_sign.gif" ALT="Under Construction"E<gt>). - - - +(<IMG SRC="yellow_sign.gif" ALT="Under Construction">). =head1 ISSUES @@ -86,24 +83,24 @@ should be considered a perpetual work in progress In most operating systems, lines in files are terminated by newlines. Just what is used as a newline may vary from OS to OS. Unix -traditionally uses C<\012>, one kind of Windows I/O uses C<\015\012>, +traditionally uses C<\012>, one type of DOSish I/O uses C<\015\012>, and S<Mac OS> uses C<\015>. -Perl uses C<\n> to represent the "logical" newline, where what -is logical may depend on the platform in use. In MacPerl, C<\n> -always means C<\015>. In DOSish perls, C<\n> usually means C<\012>, but -when accessing a file in "text" mode, STDIO translates it to (or from) -C<\015\012>. - -Due to the "text" mode translation, DOSish perls have limitations -of using C<seek> and C<tell> when a file is being accessed in "text" -mode. Specifically, if you stick to C<seek>-ing to locations you got -from C<tell> (and no others), you are usually free to use C<seek> and -C<tell> even in "text" mode. In general, using C<seek> or C<tell> or -other file operations that count bytes instead of characters, without -considering the length of C<\n>, may be non-portable. If you use -C<binmode> on a file, however, you can usually use C<seek> and C<tell> -with arbitrary values quite safely. +Perl uses C<\n> to represent the "logical" newline, where what is +logical may depend on the platform in use. In MacPerl, C<\n> always +means C<\015>. In DOSish perls, C<\n> usually means C<\012>, but +when accessing a file in "text" mode, STDIO translates it to (or +from) C<\015\012>, depending on whether you're reading or writing. +Unix does the same thing on ttys in canonical mode. C<\015\012> +is commonly referred to as CRLF. + +Because of the "text" mode translation, DOSish perls have limitations +in using C<seek> and C<tell> on a file accessed in "text" mode. +Stick to C<seek>-ing to locations you got from C<tell> (and no +others), and you are usually free to use C<seek> and C<tell> even +in "text" mode. Using C<seek> or C<tell> or other file operations +may be non-portable. If you use C<binmode> on a file, however, you +can usually C<seek> and C<tell> with arbitrary values in safety. A common misconception in socket programming is that C<\n> eq C<\012> everywhere. When using protocols such as common Internet protocols, @@ -113,28 +110,23 @@ the logical C<\n> and C<\r> (carriage return) are not reliable. print SOCKET "Hi there, client!\r\n"; # WRONG print SOCKET "Hi there, client!\015\012"; # RIGHT -[NOTE: this does not necessarily apply to communications that are -filtered by another program or module before sending to the socket; the -the most popular EBCDIC webserver, for instance, accepts C<\r\n>, -which translates those characters, along with all other -characters in text streams, from EBCDIC to ASCII.] - However, using C<\015\012> (or C<\cM\cJ>, or C<\x0D\x0A>) can be tedious and unsightly, as well as confusing to those maintaining the code. As -such, the C<Socket> module supplies the Right Thing for those who want it. +such, the Socket module supplies the Right Thing for those who want it. use Socket qw(:DEFAULT :crlf); print SOCKET "Hi there, client!$CRLF" # RIGHT -When reading I<from> a socket, remember that the default input record -separator (C<$/>) is C<\n>, but code like this should recognize C<$/> as -C<\012> or C<\015\012>: +When reading from a socket, remember that the default input record +separator C<$/> is C<\n>, but robust socket code will recognize as +either C<\012> or C<\015\012> as end of line: while (<SOCKET>) { # ... } -Better: +Because both CRLF and LF end in LF, the input record separator can +be set to LF and any CR stripped later. Better to write: use Socket qw(:DEFAULT :crlf); local($/) = LF; # not needed if $/ is already \012 @@ -144,67 +136,103 @@ Better: # s/\015?\012/\n/; # same thing } -And this example is actually better than the previous one even for Unix -platforms, because now any C<\015>'s (C<\cM>'s) are stripped out +This example is preferred over the previous one--even for Unix +platforms--because now any C<\015>'s (C<\cM>'s) are stripped out (and there was much rejoicing). -An important thing to remember is that functions that return data -should translate newlines when appropriate. Often one line of code -will suffice: +Similarly, functions that return text data--such as a function that +fetches a web page--should sometimes translate newlines before +returning the data, if they've not yet been translated to the local +newline representation. A single line of code will often suffice: + + $data =~ s/\015?\012/\n/g; + return $data; + +Some of this may be confusing. Here's a handy reference to the ASCII CR +and LF characters. You can print it out and stick it in your wallet. + + LF == \012 == \x0A == \cJ == ASCII 10 + CR == \015 == \x0D == \cM == ASCII 13 - $data =~ s/\015?\012/\n/g; - return $data; + | Unix | DOS | Mac | + --------------------------- + \n | LF | LF | CR | + \r | CR | CR | LF | + \n * | LF | CRLF | CR | + \r * | CR | CR | LF | + --------------------------- + * text-mode STDIO +The Unix column assumes that you are not accessing a serial line +(like a tty) in canonical mode. If you are, then CR on input becomes +"\n", and "\n" on output becomes CRLF. + +These are just the most common definitions of C<\n> and C<\r> in Perl. +There may well be others. =head2 Numbers endianness and Width Different CPUs store integers and floating point numbers in different orders (called I<endianness>) and widths (32-bit and 64-bit being the -most common). This affects your programs if they attempt to transfer -numbers in binary format from a CPU architecture to another over some -channel: either 'live' via network connections or storing the numbers -to secondary storage such as a disk file. +most common today). This affects your programs when they attempt to transfer +numbers in binary format from one CPU architecture to another, +usually either "live" via network connection, or by storing the +numbers to secondary storage such as a disk file or tape. -Conflicting storage orders make utter mess out of the numbers: if a -little-endian host (Intel, Alpha) stores 0x12345678 (305419896 in +Conflicting storage orders make utter mess out of the numbers. If a +little-endian host (Intel, VAX) stores 0x12345678 (305419896 in decimal), a big-endian host (Motorola, MIPS, Sparc, PA) reads it as 0x78563412 (2018915346 in decimal). To avoid this problem in network -(socket) connections use the C<pack()> and C<unpack()> formats C<"n"> -and C<"N">, the "network" orders, they are guaranteed to be portable. +(socket) connections use the C<pack> and C<unpack> formats C<n> +and C<N>, the "network" orders. These are guaranteed to be portable. + +You can explore the endianness of your platform by unpacking a +data structure packed in native format such as: + + print unpack("h*", pack("s2", 1, 2)), "\n"; + # '10002000' on e.g. Intel x86 or Alpha 21064 in little-endian mode + # '00100020' on e.g. Motorola 68040 + +If you need to distinguish between endian architectures you could use +either of the variables set like so: -Different widths can cause truncation even between platforms of equal -endianness: the platform of shorter width loses the upper parts of the + $is_big_endian = unpack("h*", pack("s", 1)) =~ /01/; + $is_litte_endian = unpack("h*", pack("s", 1)) =~ /^1/; + +Differing widths can cause truncation even between platforms of equal +endianness. The platform of shorter width loses the upper parts of the number. There is no good solution for this problem except to avoid transferring or storing raw binary numbers. -One can circumnavigate both these problems in two ways: either +One can circumnavigate both these problems in two ways. Either transfer and store numbers always in text format, instead of raw -binary, or consider using modules like C<Data::Dumper> (included in -the standard distribution as of Perl 5.005) and C<Storable>. +binary, or else consider using modules like Data::Dumper (included in +the standard distribution as of Perl 5.005) and Storable. Keeping +all data as text significantly simplifies matters. =head2 Files and Filesystems Most platforms these days structure files in a hierarchical fashion. -So, it is reasonably safe to assume that any platform supports the -notion of a "path" to uniquely identify a file on the system. Just -how that path is actually written, differs. +So, it is reasonably safe to assume that all platforms support the +notion of a "path" to uniquely identify a file on the system. How +that path is really written, though, differs considerably. -While they are similar, file path specifications differ between Unix, -Windows, S<Mac OS>, OS/2, VMS, VOS, S<RISC OS> and probably others. -Unix, for example, is one of the few OSes that has the idea of a single -root directory. +Atlhough similar, file path specifications differ between Unix, +Windows, S<Mac OS>, OS/2, VMS, VOS, S<RISC OS>, and probably others. +Unix, for example, is one of the few OSes that has the elegant idea +of a single root directory. -VMS, Windows, and OS/2 can work similarly to Unix with C</> as path -separator, or in their own idiosyncratic ways (such as having several -root directories and various "unrooted" device files such NIL: and -LPT:). +DOS, OS/2, VMS, VOS, and Windows can work similarly to Unix with C</> +as path separator, or in their own idiosyncratic ways (such as having +several root directories and various "unrooted" device files such NIL: +and LPT:). S<Mac OS> uses C<:> as a path separator instead of C</>. -The filesystem may support neither hard links (C<link()>) nor -symbolic links (C<symlink()>, C<readlink()>, C<lstat()>). +The filesystem may support neither hard links (C<link>) nor +symbolic links (C<symlink>, C<readlink>, C<lstat>). -The filesystem may not support neither access timestamp nor change +The filesystem may support neither access timestamp nor change timestamp (meaning that about the only portable timestamp is the modification timestamp), or one second granularity of any timestamps (e.g. the FAT filesystem limits the time granularity to two seconds). @@ -213,84 +241,91 @@ VOS perl can emulate Unix filenames with C</> as path separator. The native pathname characters greater-than, less-than, number-sign, and percent-sign are always accepted. -C<RISC OS> perl can emulate Unix filenames with C</> as path +S<RISC OS> perl can emulate Unix filenames with C</> as path separator, or go native and use C<.> for path separator and C<:> to -signal filing systems and disc names. +signal filesystems and disk names. -As with the newline problem above, there are modules that can help. The -C<File::Spec> modules provide methods to do the Right Thing on whatever -platform happens to be running the program. +If all this is intimidating, have no (well, maybe only a little) +fear. There are modules that can help. The File::Spec modules +provide methods to do the Right Thing on whatever platform happens +to be running the program. - use File::Spec; - chdir(File::Spec->updir()); # go up one directory - $file = File::Spec->catfile( - File::Spec->curdir(), 'temp', 'file.txt' - ); + use File::Spec::Functions; + chdir(updir()); # go up one directory + $file = catfile(curdir(), 'temp', 'file.txt'); # on Unix and Win32, './temp/file.txt' # on Mac OS, ':temp:file.txt' + # on VMS, '[.temp]file.txt' -File::Spec is available in the standard distribution, as of version -5.004_05. +File::Spec is available in the standard distribution as of version +5.004_05. File::Spec::Functions is only in File::Spec 0.7 and later, +and some versions of perl come with version 0.6. If File::Spec +is not updated to 0.7 or later, you must use the object-oriented +interface from File::Spec (or upgrade File::Spec). -In general, production code should not have file paths hardcoded; making -them user supplied or from a configuration file is better, keeping in mind -that file path syntax varies on different machines. +In general, production code should not have file paths hardcoded. +Making them user-supplied or read from a configuration file is +better, keeping in mind that file path syntax varies on different +machines. This is especially noticeable in scripts like Makefiles and test suites, which often assume C</> as a path separator for subdirectories. -Also of use is C<File::Basename>, from the standard distribution, which +Also of use is File::Basename from the standard distribution, which splits a pathname into pieces (base filename, full path to directory, and file suffix). -Even when on a single platform (if you can call UNIX a single platform), -remember not to count on the existence or the contents of +Even when on a single platform (if you can call Unix a single platform), +remember not to count on the existence or the contents of particular system-specific files or directories, like F</etc/passwd>, -F</etc/sendmail.conf>, F</etc/resolv.conf>, or even F</tmp/>. For -example, F</etc/passwd> may exist but it may not contain the encrypted -passwords because the system is using some form of enhanced security -- -or it may not contain all the accounts because the system is using NIS. +F</etc/sendmail.conf>, F</etc/resolv.conf>, or even F</tmp/>. For +example, F</etc/passwd> may exist but not contain the encrypted +passwords, because the system is using some form of enhanced security. +Or it may not contain all the accounts, because the system is using NIS. If code does need to rely on such a file, include a description of the -file and its format in the code's documentation, and make it easy for +file and its format in the code's documentation, then make it easy for the user to override the default location of the file. -Don't assume a text file will end with a newline. +Don't assume a text file will end with a newline. They should, +but people forget. Do not have two files of the same name with different case, like F<test.pl> and F<Test.pl>, as many platforms have case-insensitive filenames. Also, try not to have non-word characters (except for C<.>) in the names, and keep them to the 8.3 convention, for maximum -portability. +portability, onerous a burden though this may appear. -Likewise, if using C<AutoSplit>, try to keep the split functions to -8.3 naming and case-insensitive conventions; or, at the very least, +Likewise, when using the AutoSplit module, try to keep your functions to +8.3 naming and case-insensitive conventions; or, at the least, make it so the resulting files have a unique (case-insensitively) first 8 characters. -There certainly can be whitespace in filenames. Many systems (DOS, -VMS) cannot have more than one C<"."> in their filenames. - -Don't assume C<E<gt>> won't be the first character of a filename. -Always use C<E<lt>> explicitly to open a file for reading. +Whitespace in filenames is tolerated on most systems, but not all. +Many systems (DOS, VMS) cannot have more than one C<.> in their filenames. - open(FILE, "<$existing_file") or die $!; +Don't assume C<< > >> won't be the first character of a filename. +Always use C<< < >> explicitly to open a file for reading, +unless you want the user to be able to specify a pipe open. -Actually, though, if filenames might use strange characters, it is -safest to open it with C<sysopen> instead of C<open>, which is magic. + open(FILE, "< $existing_file") or die $!; +If filenames might use strange characters, it is safest to open it +with C<sysopen> instead of C<open>. C<open> is magic and can +translate characters like C<< > >>, C<< < >>, and C<|>, which may +be the wrong thing to do. (Sometimes, though, it's the right thing.) =head2 System Interaction -Not all platforms provide for the notion of a command line, necessarily. -These are usually platforms that rely on a Graphical User Interface (GUI) -for user interaction. So a program requiring command lines might not work -everywhere. But this is probably for the user of the program to deal -with. +Not all platforms provide a command line. These are usually platforms +that rely primarily on a Graphical User Interface (GUI) for user +interaction. A program requiring a command line interface might +not work everywhere. This is probably for the user of the program +to deal with, so don't stay up late worrying about it. -Some platforms can't delete or rename files that are being held open by -the system. Remember to C<close> files when you are done with them. -Don't C<unlink> or C<rename> an open file. Don't C<tie> to or C<open> a -file that is already tied to or opened; C<untie> or C<close> first. +Some platforms can't delete or rename files held open by the system. +Remember to C<close> files when you are done with them. Don't +C<unlink> or C<rename> an open file. Don't C<tie> or C<open> a +file already tied or opened; C<untie> or C<close> it first. Don't open the same file more than once at a time for writing, as some operating systems put mandatory locks on such files. @@ -299,7 +334,7 @@ Don't count on a specific environment variable existing in C<%ENV>. Don't count on C<%ENV> entries being case-sensitive, or even case-preserving. -Don't count on signals. +Don't count on signals or C<%SIG> for anything. Don't count on filename globbing. Use C<opendir>, C<readdir>, and C<closedir> instead. @@ -309,115 +344,123 @@ directories. Don't count on specific values of C<$!>. - =head2 Interprocess Communication (IPC) -In general, don't directly access the system in code that is meant to be -portable. That means, no C<system>, C<exec>, C<fork>, C<pipe>, C<``>, -C<qx//>, C<open> with a C<|>, nor any of the other things that makes being -a Unix perl hacker worth being. +In general, don't directly access the system in code meant to be +portable. That means, no C<system>, C<exec>, C<fork>, C<pipe>, +C<``>, C<qx//>, C<open> with a C<|>, nor any of the other things +that makes being a perl hacker worth being. Commands that launch external processes are generally supported on -most platforms (though many of them do not support any type of forking), -but the problem with using them arises from what you invoke with them. -External tools are often named differently on different platforms, often -not available in the same location, often accept different arguments, -often behave differently, and often represent their results in a -platform-dependent way. Thus you should seldom depend on them to produce -consistent results. +most platforms (though many of them do not support any type of +forking). The problem with using them arises from what you invoke +them on. External tools are often named differently on different +platforms, may not be available in the same location, migth accept +different arguments, can behave differently, and often present their +results in a platform-dependent way. Thus, you should seldom depend +on them to produce consistent results. (Then again, if you're calling +I<netstat -a>, you probably don't expect it to run on both Unix and CP/M.) -One especially common bit of Perl code is opening a pipe to sendmail: +One especially common bit of Perl code is opening a pipe to B<sendmail>: - open(MAIL, '|/usr/lib/sendmail -t') or die $!; + open(MAIL, '|/usr/lib/sendmail -t') + or die "cannot fork sendmail: $!"; This is fine for systems programming when sendmail is known to be available. But it is not fine for many non-Unix systems, and even some Unix systems that may not have sendmail installed. If a portable -solution is needed, see the C<Mail::Send> and C<Mail::Mailer> modules -in the C<MailTools> distribution. C<Mail::Mailer> provides several -mailing methods, including mail, sendmail, and direct SMTP -(via C<Net::SMTP>) if a mail transfer agent is not available. +solution is needed, see the various distributions on CPAN that deal +with it. Mail::Mailer and Mail::Send in the MailTools distribution are +commonly used, and provide several mailing methods, including mail, +sendmail, and direct SMTP (via Net::SMTP) if a mail transfer agent is +not available. Mail::Sendmail is a standalone module that provides +simple, platform-independent mailing. + +The Unix System V IPC (C<msg*(), sem*(), shm*()>) is not available +even on all Unix platforms. The rule of thumb for portable code is: Do it all in portable Perl, or use a module (that may internally implement it with platform-specific code, but expose a common interface). -The UNIX System V IPC (C<msg*(), sem*(), shm*()>) is not available -even in all UNIX platforms. - - =head2 External Subroutines (XS) -XS code, in general, can be made to work with any platform; but dependent +XS code can usually be made to work with any platform, but dependent libraries, header files, etc., might not be readily available or portable, or the XS code itself might be platform-specific, just as Perl code might be. If the libraries and headers are portable, then it is normally reasonable to make sure the XS code is portable, too. -There is a different kind of portability issue with writing XS -code: availability of a C compiler on the end-user's system. C brings -with it its own portability issues, and writing XS code will expose you to -some of those. Writing purely in perl is a comparatively easier way to +A different type of portability issue arises when writing XS code: +availability of a C compiler on the end-user's system. C brings +with it its own portability issues, and writing XS code will expose +you to some of those. Writing purely in Perl is an easier way to achieve portability. - =head2 Standard Modules In general, the standard modules work across platforms. Notable -exceptions are C<CPAN.pm> (which currently makes connections to external +exceptions are the CPAN module (which currently makes connections to external programs that may not be available), platform-specific modules (like -C<ExtUtils::MM_VMS>), and DBM modules. +ExtUtils::MM_VMS), and DBM modules. -There is no one DBM module that is available on all platforms. -C<SDBM_File> and the others are generally available on all Unix and DOSish -ports, but not in MacPerl, where only C<NBDM_File> and C<DB_File> are +There is no one DBM module available on all platforms. +SDBM_File and the others are generally available on all Unix and DOSish +ports, but not in MacPerl, where only NBDM_File and DB_File are available. The good news is that at least some DBM module should be available, and -C<AnyDBM_File> will use whichever module it can find. Of course, then -the code needs to be fairly strict, dropping to the lowest common -denominator (e.g., not exceeding 1K for each record). - +AnyDBM_File will use whichever module it can find. Of course, then +the code needs to be fairly strict, dropping to the greatest common +factor (e.g., not exceeding 1K for each record), so that it will +work with any DBM module. See L<AnyDBM_File> for more details. =head2 Time and Date The system's notion of time of day and calendar date is controlled in -widely different ways. Don't assume the timezone is stored in C<$ENV{TZ}>, +widely different ways. Don't assume the timezone is stored in C<$ENV{TZ}>, and even if it is, don't assume that you can control the timezone through that variable. Don't assume that the epoch starts at 00:00:00, January 1, 1970, -because that is OS-specific. Better to store a date in an unambiguous -representation. The ISO 8601 standard defines YYYY-MM-DD as the date -format. A text representation (like C<1 Jan 1970>) can be easily -converted into an OS-specific value using a module like -C<Date::Parse>. An array of values, such as those returned by +because that is OS- and implementation-specific. It is better to store a date +in an unambiguous representation. The ISO-8601 standard defines +"YYYY-MM-DD" as the date format. A text representation (like "1987-12-18") +can be easily converted into an OS-specific value using a module like +Date::Parse. An array of values, such as those returned by C<localtime>, can be converted to an OS-specific representation using -C<Time::Local>. +Time::Local. +When calculating specific times, such as for tests in time or date modules, +it may be appropriate to calculate an offset for the epoch. + + require Time::Local; + $offset = Time::Local::timegm(0, 0, 0, 1, 0, 70); + +The value for C<$offset> in Unix will be C<0>, but in Mac OS will be +some large number. C<$offset> can then be added to a Unix time value +to get what should be the proper value on any system. =head2 Character sets and character encoding -Assume very little about character sets. Do not assume anything about -the numerical values (C<ord()>, C<chr()>) of characters. Do not +Assume little about character sets. Assume nothing about +numerical values (C<ord>, C<chr>) of characters. Do not assume that the alphabetic characters are encoded contiguously (in -numerical sense). Do not assume anything about the ordering of the +the numeric sense). Do not assume anything about the ordering of the characters. The lowercase letters may come before or after the -uppercase letters, the lowercase and uppercase may be interlaced so -that both 'a' and 'A' come before the 'b', the accented and other +uppercase letters; the lowercase and uppercase may be interlaced so +that both `a' and `A' come before `b'; the accented and other international characters may be interlaced so that E<auml> comes -before the 'b'. - +before `b'. =head2 Internationalisation -If you may assume POSIX (a rather large assumption, that in practice -means UNIX), you may read more about the POSIX locale system from -L<perllocale>. The locale system at least attempts to make things a -little bit more portable, or at least more convenient and -native-friendly for non-English users. The system affects character -sets and encoding, and date and time formatting, among other things. - +If you may assume POSIX (a rather large assumption), you may read +more about the POSIX locale system from L<perllocale>. The locale +system at least attempts to make things a little bit more portable, +or at least more convenient and native-friendly for non-English +users. The system affects character sets and encoding, and date +and time formatting--amongst other things. =head2 System Resources @@ -434,31 +477,40 @@ of avoiding wasteful constructs such as: while (<FILE>) {$file .= $_} # sometimes bad $file = join('', <FILE>); # better -The last two may appear unintuitive to most people. The first of those -two constructs repeatedly grows a string, while the second allocates a -large chunk of memory in one go. On some systems, the latter is more -efficient that the former. - +The last two constructs may appear unintuitive to most people. The +first repeatedly grows a string, whereas the second allocates a +large chunk of memory in one go. On some systems, the second is +more efficient that the first. =head2 Security -Most multi-user platforms provide basic levels of security that is usually -felt at the file-system level. Other platforms usually don't -(unfortunately). Thus the notion of user id, or "home" directory, or even -the state of being logged-in, may be unrecognizable on many platforms. If -you write programs that are security conscious, it is usually best to know -what type of system you will be operating under, and write code explicitly -for that platform (or class of platforms). - +Most multi-user platforms provide basic levels of security, usually +implemented at the filesystem level. Some, however, do +not--unfortunately. Thus the notion of user id, or "home" directory, +or even the state of being logged-in, may be unrecognizable on many +platforms. If you write programs that are security-conscious, it +is usually best to know what type of system you will be running +under so that you can write code explicitly for that platform (or +class of platforms). =head2 Style For those times when it is necessary to have platform-specific code, consider keeping the platform-specific code in one place, making porting -to other platforms easier. Use the C<Config> module and the special +to other platforms easier. Use the Config module and the special variable C<$^O> to differentiate platforms, as described in L<"PLATFORMS">. +Be careful in the tests you supply with your module or programs. +Module code may be fully portable, but its tests might not be. This +often happens when tests spawn off other processes or call external +programs to aid in the testing, or when (as noted above) the tests +assume certain things about the filesystem and paths. Be careful +not to depend on a specific output style for errors, such as when +checking C<$!> after an system call. Some platforms expect a certain +output format, and perl on those platforms may have been adjusted +accordingly. Most specifically, don't anchor a regex when testing +an error value. =head1 CPAN Testers @@ -469,58 +521,80 @@ this platform), or UNKNOWN (unknown), along with any relevant notations. The purpose of the testing is twofold: one, to help developers fix any problems in their code that crop up because of lack of testing on other -platforms; two, to provide users with information about whether or not +platforms; two, to provide users with information about whether a given module works on a given platform. =over 4 =item Mailing list: cpan-testers@perl.org -=item Testing results: C<http://www.connect.net/gbarr/cpan-test/> +=item Testing results: http://testers.cpan.org/ =back - =head1 PLATFORMS As of version 5.002, Perl is built with a C<$^O> variable that indicates the operating system it was built on. This was implemented -to help speed up code that would otherwise have to C<use Config;> and -use the value of C<$Config{'osname'}>. Of course, to get +to help speed up code that would otherwise have to C<use Config> +and use the value of C<$Config{osname}>. Of course, to get more detailed information about the system, looking into C<%Config> is certainly recommended. +C<%Config> cannot always be trusted, however, because it was built +at compile time. If perl was built in one place, then transferred +elsewhere, some values may be wrong. The values may even have been +edited after the fact. + =head2 Unix Perl works on a bewildering variety of Unix and Unix-like platforms (see e.g. most of the files in the F<hints/> directory in the source code kit). On most of these systems, the value of C<$^O> (hence C<$Config{'osname'}>, -too) is determined by lowercasing and stripping punctuation from the first -field of the string returned by typing C<uname -a> (or a similar command) -at the shell prompt. Here, for example, are a few of the more popular -Unix flavors: - - uname $^O $Config{'archname'} - ------------------------------------------- - AIX aix aix - FreeBSD freebsd freebsd-i386 - Linux linux i386-linux - HP-UX hpux PA-RISC1.1 - IRIX irix irix - OSF1 dec_osf alpha-dec_osf - SunOS solaris sun4-solaris - SunOS solaris i86pc-solaris - SunOS4 sunos sun4-sunos - -Note that because the C<$Config{'archname'}> may depend on the hardware -architecture it may vary quite a lot, much more than the C<$^O>. +too) is determined either by lowercasing and stripping punctuation from the +first field of the string returned by typing C<uname -a> (or a similar command) +at the shell prompt or by testing the file system for the presence of +uniquely named files such as a kernel or header file. Here, for example, +are a few of the more popular Unix flavors: + + uname $^O $Config{'archname'} + -------------------------------------------- + AIX aix aix + BSD/OS bsdos i386-bsdos + dgux dgux AViiON-dgux + DYNIX/ptx dynixptx i386-dynixptx + FreeBSD freebsd freebsd-i386 + Linux linux arm-linux + Linux linux i386-linux + Linux linux i586-linux + Linux linux ppc-linux + HP-UX hpux PA-RISC1.1 + IRIX irix irix + Mac OS X rhapsody rhapsody + MachTen PPC machten powerpc-machten + NeXT 3 next next-fat + NeXT 4 next OPENSTEP-Mach + openbsd openbsd i386-openbsd + OSF1 dec_osf alpha-dec_osf + reliantunix-n svr4 RM400-svr4 + SCO_SV sco_sv i386-sco_sv + SINIX-N svr4 RM400-svr4 + sn4609 unicos CRAY_C90-unicos + sn6521 unicosmk t3e-unicosmk + sn9617 unicos CRAY_J90-unicos + SunOS solaris sun4-solaris + SunOS solaris i86pc-solaris + SunOS4 sunos sun4-sunos + +Because the value of C<$Config{archname}> may depend on the +hardware architecture, it can vary more than the value of C<$^O>. =head2 DOS and Derivatives -Perl has long been ported to PC style microcomputers running under +Perl has long been ported to Intel-style microcomputers running under systems like PC-DOS, MS-DOS, OS/2, and most Windows platforms you can bring yourself to mention (except for Windows CE, if you count that). -Users familiar with I<COMMAND.COM> and/or I<CMD.EXE> style shells should +Users familiar with I<COMMAND.COM> or I<CMD.EXE> style shells should be aware that each of these file specifications may have subtle differences: @@ -529,35 +603,39 @@ differences: $filespec2 = 'c:\foo\bar\file.txt'; $filespec3 = 'c:\\foo\\bar\\file.txt'; -System calls accept either C</> or C<\> as the path separator. However, -many command-line utilities of DOS vintage treat C</> as the option -prefix, so they may get confused by filenames containing C</>. Aside -from calling any external programs, C</> will work just fine, and -probably better, as it is more consistent with popular usage, and avoids -the problem of remembering what to backwhack and what not to. +System calls accept either C</> or C<\> as the path separator. +However, many command-line utilities of DOS vintage treat C</> as +the option prefix, so may get confused by filenames containing C</>. +Aside from calling any external programs, C</> will work just fine, +and probably better, as it is more consistent with popular usage, +and avoids the problem of remembering what to backwhack and what +not to. -The DOS FAT filesystem can only accommodate "8.3" style filenames. Under -the "case insensitive, but case preserving" HPFS (OS/2) and NTFS (NT) +The DOS FAT filesystem can accommodate only "8.3" style filenames. Under +the "case-insensitive, but case-preserving" HPFS (OS/2) and NTFS (NT) filesystems you may have to be careful about case returned with functions like C<readdir> or used with functions like C<open> or C<opendir>. -DOS also treats several filenames as special, such as AUX, PRN, NUL, CON, -COM1, LPT1, LPT2 etc. Unfortunately these filenames won't even work -if you include an explicit directory prefix, in some cases. It is best -to avoid such filenames, if you want your code to be portable to DOS -and its derivatives. +DOS also treats several filenames as special, such as AUX, PRN, +NUL, CON, COM1, LPT1, LPT2, etc. Unfortunately, sometimes these +filenames won't even work if you include an explicit directory +prefix. It is best to avoid such filenames, if you want your code +to be portable to DOS and its derivatives. It's hard to know what +these all are, unfortunately. Users of these operating systems may also wish to make use of -scripts such as I<pl2bat.bat> or I<pl2cmd> as appropriate to +scripts such as I<pl2bat.bat> or I<pl2cmd> to put wrappers around your scripts. Newline (C<\n>) is translated as C<\015\012> by STDIO when reading from -and writing to files. C<binmode(FILEHANDLE)> will keep C<\n> translated -as C<\012> for that filehandle. Since it is a noop on other systems, -C<binmode> should be used for cross-platform code that deals with binary -data. - -The C<$^O> variable and the C<$Config{'archname'}> values for various +and writing to files (see L<"Newlines">). C<binmode(FILEHANDLE)> +will keep C<\n> translated as C<\012> for that filehandle. Since it is a +no-op on other systems, C<binmode> should be used for cross-platform code +that deals with binary data. That's assuming you realize in advance +that your data is in binary. General-purpose programs should +often assume nothing about their data. + +The C<$^O> variable and the C<$Config{archname}> values for various DOSish perls are as follows: OS $^O $Config{'archname'} @@ -566,34 +644,61 @@ DOSish perls are as follows: PC-DOS dos OS/2 os2 Windows 95 MSWin32 MSWin32-x86 + Windows 98 MSWin32 MSWin32-x86 Windows NT MSWin32 MSWin32-x86 - Windows NT MSWin32 MSWin32-alpha + Windows NT MSWin32 MSWin32-ALPHA Windows NT MSWin32 MSWin32-ppc + Cygwin cygwin Also see: =over 4 -=item The djgpp environment for DOS, C<http://www.delorie.com/djgpp/> +=item * -=item The EMX environment for DOS, OS/2, etc. C<emx@iaehv.nl>, -C<http://www.leo.org/pub/comp/os/os2/leo/gnu/emx+gcc/index.html> or -C<ftp://hobbes.nmsu.edu/pub/os2/dev/emx> +The djgpp environment for DOS, http://www.delorie.com/djgpp/ +and L<perldos>. -=item Build instructions for Win32, L<perlwin32>. +=item * -=item The ActiveState Pages, C<http://www.activestate.com/> +The EMX environment for DOS, OS/2, etc. emx@iaehv.nl, +http://www.leo.org/pub/comp/os/os2/leo/gnu/emx+gcc/index.html or +ftp://hobbes.nmsu.edu/pub/os2/dev/emx. Also L<perlos2>. -=back +=item * +Build instructions for Win32 in L<perlwin32>, or under the Cygnus environment +in L<perlcygwin>. + +=item * + +The C<Win32::*> modules in L<Win32>. + +=item * + +The ActiveState Pages, http://www.activestate.com/ + +=item * + +The Cygwin environment for Win32; F<README.cygwin> (installed +as L<perlcygwin>), http://sourceware.cygnus.com/cygwin/ + +=item * + +The U/WIN environment for Win32, +<http://www.research.att.com/sw/tools/uwin/ + +=item Build instructions for OS/2, L<perlos2> + + +=back =head2 S<Mac OS> Any module requiring XS compilation is right out for most people, because MacPerl is built using non-free (and non-cheap!) compilers. Some XS modules that can work with MacPerl are built and distributed in binary -form on CPAN. See I<MacPerl: Power and Ease> and L<"CPAN Testers"> -for more details. +form on CPAN. Directories are specified as: @@ -604,12 +709,12 @@ Directories are specified as: :file for relative pathnames file for relative pathnames -Files in a directory are stored in alphabetical order. Filenames are -limited to 31 characters, and may include any character except C<:>, -which is reserved as a path separator. +Files are stored in the directory in alphabetical order. Filenames are +limited to 31 characters, and may include any character except for +null and C<:>, which is reserved as the path separator. Instead of C<flock>, see C<FSpSetFLock> and C<FSpRstFLock> in the -C<Mac::Files> module, or C<chmod(0444, ...)> and C<chmod(0666, ...)>. +Mac::Files module, or C<chmod(0444, ...)> and C<chmod(0666, ...)>. In the MacPerl application, you can't run a program from the command line; programs that expect C<@ARGV> to be populated can be edited with something @@ -620,13 +725,13 @@ line arguments. @ARGV = split /\s+/, MacPerl::Ask('Arguments?'); } -A MacPerl script saved as a droplet will populate C<@ARGV> with the full +A MacPerl script saved as a "droplet" will populate C<@ARGV> with the full pathnames of the files dropped onto the script. -Mac users can use programs on a kind of command line under MPW (Macintosh -Programmer's Workshop, a free development environment from Apple). -MacPerl was first introduced as an MPW tool, and MPW can be used like a -shell: +Mac users can run programs under a type of command line interface +under MPW (Macintosh Programmer's Workshop, a free development +environment from Apple). MacPerl was first introduced as an MPW +tool, and MPW can be used like a shell: perl myscript.plx some arguments @@ -644,25 +749,37 @@ the application or MPW tool version is running, check: $is_ppc = $MacPerl::Architecture eq 'MacPPC'; $is_68k = $MacPerl::Architecture eq 'Mac68K'; -S<Mac OS X>, to be based on NeXT's OpenStep OS, will (in theory) be able -to run MacPerl natively, but Unix perl will also run natively under the -built-in Unix environment. +S<Mac OS X> and S<Mac OS X Server>, based on NeXT's OpenStep OS, will +(in theory) be able to run MacPerl natively, under the "Classic" +environment. The new "Cocoa" environment (formerly called the "Yellow Box") +may run a slightly modified version of MacPerl, using the Carbon interfaces. + +S<Mac OS X Server> and its Open Source version, Darwin, both run Unix +perl natively (with a few patches). Full support for these +is slated for perl 5.6. Also see: =over 4 -=item The MacPerl Pages, C<http://www.ptf.com/macperl/>. +=item * -=item The MacPerl mailing list, C<mac-perl-request@iis.ee.ethz.ch>. +The MacPerl Pages, http://www.macperl.com/ . -=back +=item * + +The MacPerl mailing lists, http://www.macperl.org/ . + +=item * +MacPerl Module Porters, http://pudge.net/mmp/ . + +=back =head2 VMS -Perl on VMS is discussed in F<vms/perlvms.pod> in the perl distribution. -Note that perl on VMS can accept either VMS- or Unix-style file +Perl on VMS is discussed in L<perlvms> in the perl distribution. +Perl on VMS can accept either VMS- or Unix-style file specifications as in either of the following: $ perl -ne "print if /perl_setup/i" SYS$LOGIN:LOGIN.COM @@ -680,7 +797,7 @@ For example: $ perl -e "print ""Hello, world.\n""" Hello, world. -There are a number of ways to wrap your perl scripts in DCL .COM files if +There are several ways to wrap your perl scripts in DCL F<.COM> files, if you are so inclined. For example: $ write sys$output "Hello from DCL!" @@ -696,16 +813,16 @@ you are so inclined. For example: $ endif Do take care with C<$ ASSIGN/nolog/user SYS$COMMAND: SYS$INPUT> if your -perl-in-DCL script expects to do things like C<$read = E<lt>STDINE<gt>;>. +perl-in-DCL script expects to do things like C<< $read = <STDIN>; >>. Filenames are in the format "name.extension;version". The maximum length for filenames is 39 characters, and the maximum length for extensions is also 39 characters. Version is a number from 1 to 32767. Valid characters are C</[A-Z0-9$_-]/>. -VMS' RMS filesystem is case insensitive and does not preserve case. +VMS's RMS filesystem is case-insensitive and does not preserve case. C<readdir> returns lowercased filenames, but specifying a file for -opening remains case insensitive. Files without extensions have a +opening remains case-insensitive. Files without extensions have a trailing period on them, so doing a C<readdir> with a file named F<A.;5> will return F<a.> (though that file could be opened with C<open(FH, 'A')>). @@ -717,15 +834,15 @@ C<PERL_ROOT:[LIB.2.3.4.5.6.7.8.9]> is not. F<Makefile.PL> authors might have to take this into account, but at least they can refer to the former as C</PERL_ROOT/lib/2/3/4/5/6/7/8/>. -The C<VMS::Filespec> module, which gets installed as part of the build +The VMS::Filespec module, which gets installed as part of the build process on VMS, is a pure Perl module that can easily be installed on non-VMS platforms and can be helpful for conversions to and from RMS native formats. -What C<\n> represents depends on the type of file that is open. It could -be C<\015>, C<\012>, C<\015\012>, or nothing. Reading from a file -translates newlines to C<\012>, unless C<binmode> was executed on that -handle, just like DOSish perls. +What C<\n> represents depends on the type of file opened. It could +be C<\015>, C<\012>, C<\015\012>, or nothing. The VMS::Stdio module +provides access to the special fopen() requirements of files with unusual +attributes on VMS. TCP/IP stacks are optional on VMS, so socket routines might not be implemented. UDP sockets may not be supported. @@ -736,31 +853,43 @@ you can examine the content of the C<@INC> array like so: if (grep(/VMS_AXP/, @INC)) { print "I'm on Alpha!\n"; + } elsif (grep(/VMS_VAX/, @INC)) { print "I'm on VAX!\n"; + } else { print "I'm not so sure about where $^O is...\n"; } +On VMS, perl determines the UTC offset from the C<SYS$TIMEZONE_DIFFERENTIAL> +logical name. Although the VMS epoch began at 17-NOV-1858 00:00:00.00, +calls to C<localtime> are adjusted to count offsets from +01-JAN-1970 00:00:00.00, just like Unix. + Also see: =over 4 -=item L<perlvms.pod> +=item * -=item vmsperl list, C<vmsperl-request@newman.upenn.edu> +F<README.vms> (installed as L<README_vms>), L<perlvms> -Put words C<SUBSCRIBE VMSPERL> in message body. +=item * -=item vmsperl on the web, C<http://www.sidhe.org/vmsperl/index.html> +vmsperl list, majordomo@perl.org -=back +(Put the words C<subscribe vmsperl> in message body.) + +=item * + +vmsperl on the web, http://www.sidhe.org/vmsperl/index.html +=back =head2 VOS Perl on VOS is discussed in F<README.vos> in the perl distribution. -Note that perl on VOS can accept either VOS- or Unix-style file +Perl on VOS can accept either VOS- or Unix-style file specifications as in either of the following: $ perl -ne "print if /perl_setup/i" >system>notices @@ -770,11 +899,12 @@ or even a mixture of both as in: $ perl -ne "print if /perl_setup/i" >system/notices -Note that even though VOS allows the slash character to appear in object +Even though VOS allows the slash character to appear in object names, because the VOS port of Perl interprets it as a pathname delimiting character, VOS files, directories, or links whose names contain a slash character cannot be processed. Such files must be -renamed before they can be processed by Perl. +renamed before they can be processed by Perl. Note that VOS limits +file names to 32 or fewer characters. The following C functions are unimplemented on VOS, and any attempt by Perl to use them will result in a fatal error message and an immediate @@ -785,9 +915,9 @@ ftp.stratus.com. The value of C<$^O> on VOS is "VOS". To determine the architecture that you are running on without resorting to loading all of C<%Config> you -can examine the content of the C<@INC> array like so: +can examine the content of the @INC array like so: - if (grep(/VOS/, @INC)) { + if ($^O =~ /VOS/) { print "I'm on a Stratus box!\n"; } else { print "I'm not on a Stratus box!\n"; @@ -796,40 +926,50 @@ can examine the content of the C<@INC> array like so: if (grep(/860/, @INC)) { print "This box is a Stratus XA/R!\n"; + } elsif (grep(/7100/, @INC)) { - print "This box is a Stratus HP 7100 or 8000!\n"; + print "This box is a Stratus HP 7100 or 8xxx!\n"; + } elsif (grep(/8000/, @INC)) { - print "This box is a Stratus HP 8000!\n"; + print "This box is a Stratus HP 8xxx!\n"; + } else { - print "This box is a Stratus 68K...\n"; + print "This box is a Stratus 68K!\n"; } Also see: =over 4 -=item L<README.vos> +=item * -=item VOS mailing list +F<README.vos> + +=item * + +The VOS mailing list. There is no specific mailing list for Perl on VOS. You can post comments to the comp.sys.stratus newsgroup, or subscribe to the general Stratus mailing list. Send a letter with "Subscribe Info-Stratus" in the message body to majordomo@list.stratagy.com. -=item VOS Perl on the web at C<http://ftp.stratus.com/pub/vos/vos.html> +=item * -=back +VOS Perl on the web at http://ftp.stratus.com/pub/vos/vos.html +=back =head2 EBCDIC Platforms Recent versions of Perl have been ported to platforms such as OS/400 on -AS/400 minicomputers as well as OS/390 & VM/ESA for IBM Mainframes. Such -computers use EBCDIC character sets internally (usually Character Code -Set ID 00819 for OS/400 and IBM-1047 for OS/390 & VM/ESA). Note that on -the mainframe perl currently works under the "Unix system services -for OS/390" (formerly known as OpenEdition) and VM/ESA OpenEdition. +AS/400 minicomputers as well as OS/390, VM/ESA, and BS2000 for S/390 +Mainframes. Such computers use EBCDIC character sets internally (usually +Character Code Set ID 0037 for OS/400 and either 1047 or POSIX-BC for S/390 +systems). On the mainframe perl currently works under the "Unix system +services for OS/390" (formerly known as OpenEdition), VM/ESA OpenEdition, or +the BS200 POSIX-BC system (BS2000 is supported in perl 5.6 and greater). +See L<perlos390> for details. As of R2.5 of USS for OS/390 and Version 2.3 of VM/ESA these Unix sub-systems do not support the C<#!> shebang trick for script invocation. @@ -843,22 +983,42 @@ similar to the following simple script: print "Hello from perl!\n"; +OS/390 will support the C<#!> shebang trick in release 2.8 and beyond. +Calls to C<system> and backticks can use POSIX shell syntax on all +S/390 systems. + +On the AS/400, if PERL5 is in your library list, you may need +to wrap your perl scripts in a CL procedure to invoke them like so: + + BEGIN + CALL PGM(PERL5/PERL) PARM('/QOpenSys/hello.pl') + ENDPGM + +This will invoke the perl script F<hello.pl> in the root of the +QOpenSys file system. On the AS/400 calls to C<system> or backticks +must use CL syntax. + On these platforms, bear in mind that the EBCDIC character set may have an effect on what happens with some perl functions (such as C<chr>, C<pack>, C<print>, C<printf>, C<ord>, C<sort>, C<sprintf>, C<unpack>), as well as bit-fiddling with ASCII constants using operators like C<^>, C<&> and C<|>, not to mention dealing with socket interfaces to ASCII computers -(see L<Newlines>). +(see L<"Newlines">). -Fortunately, most web servers for the mainframe will correctly translate -the C<\n> in the following statement to its ASCII equivalent (note that -C<\r> is the same under both Unix and OS/390 & VM/ESA): +Fortunately, most web servers for the mainframe will correctly +translate the C<\n> in the following statement to its ASCII equivalent +(C<\r> is the same under both Unix and OS/390 & VM/ESA): print "Content-type: text/html\r\n\r\n"; -The value of C<$^O> on OS/390 is "os390". +The values of C<$^O> on some of these platforms includes: -The value of C<$^O> on VM/ESA is "vmesa". + uname $^O $Config{'archname'} + -------------------------------------------- + OS/390 os390 os390 + OS400 os400 os400 + POSIX-BC posix-bc BS2000-posix-bc + VM/ESA vmesa vmesa Some simple tricks for determining if you are running on an EBCDIC platform could include any of the following (perhaps all): @@ -869,7 +1029,7 @@ platform could include any of the following (perhaps all): if (chr(169) eq 'z') { print "EBCDIC may be spoken here!\n"; } -Note that one thing you may not want to rely on is the EBCDIC encoding +One thing you may not want to rely on is the EBCDIC encoding of punctuation characters since these may differ from code page to code page (and once your module or script is rumoured to work with EBCDIC, folks will want it to work with all EBCDIC character sets). @@ -878,33 +1038,42 @@ Also see: =over 4 -=item perl-mvs list +=item * + +* + +L<perlos390>, F<README.os390>, F<README.posix-bc>, F<README.vmesa> + +=item * The perl-mvs@perl.org list is for discussion of porting issues as well as general usage issues for all EBCDIC Perls. Send a message body of "subscribe perl-mvs" to majordomo@perl.org. -=item AS/400 Perl information at C<http://as400.rochester.ibm.com/> +=item * -=back +AS/400 Perl information at +ttp://as400.rochester.ibm.com/ +as well as on CPAN in the F<ports/> directory. +=back =head2 Acorn RISC OS -As Acorns use ASCII with newlines (C<\n>) in text files as C<\012> like -Unix and Unix filename emulation is turned on by default, it is quite -likely that most simple scripts will work "out of the box". The native -filing system is modular, and individual filing systems are free to be +Because Acorns use ASCII with newlines (C<\n>) in text files as C<\012> like +Unix, and because Unix filename emulation is turned on by default, +most simple scripts will probably work "out of the box". The native +filesystem is modular, and individual filesystems are free to be case-sensitive or insensitive, and are usually case-preserving. Some -native filing systems have name length limits which file and directory -names are silently truncated to fit - scripts should be aware that the -standard disc filing system currently has a name length limit of B<10> -characters, with up to 77 items in a directory, but other filing systems +native filesystems have name length limits, which file and directory +names are silently truncated to fit. Scripts should be aware that the +standard filesystem currently has a name length limit of B<10> +characters, with up to 77 items in a directory, but other filesystems may not impose such limitations. Native filenames are of the form - Filesystem#Special_Field::DiscName.$.Directory.Directory.File + Filesystem#Special_Field::DiskName.$.Directory.Directory.File where @@ -919,27 +1088,27 @@ where The default filename translation is roughly C<tr|/.|./|;> -Note that C<"ADFS::HardDisc.$.File" ne 'ADFS::HardDisc.$.File'> and that +Note that C<"ADFS::HardDisk.$.File" ne 'ADFS::HardDisk.$.File'> and that the second stage of C<$> interpolation in regular expressions will fall foul of the C<$.> if scripts are not careful. Logical paths specified by system variables containing comma-separated -search lists are also allowed, hence C<System:Modules> is a valid +search lists are also allowed; hence C<System:Modules> is a valid filename, and the filesystem will prefix C<Modules> with each section of -C<System$Path> until a name is made that points to an object on disc. -Writing to a new file C<System:Modules> would only be allowed if +C<System$Path> until a name is made that points to an object on disk. +Writing to a new file C<System:Modules> would be allowed only if C<System$Path> contains a single item list. The filesystem will also expand system variables in filenames if enclosed in angle brackets, so -C<E<lt>System$DirE<gt>.Modules> would look for the file +C<< <System$Dir>.Modules >> would look for the file S<C<$ENV{'System$Dir'} . 'Modules'>>. The obvious implication of this is -that B<fully qualified filenames can start with C<E<lt>E<gt>>> and should +that B<fully qualified filenames can start with C<< <> >>> and should be protected when C<open> is used for input. Because C<.> was in use as a directory separator and filenames could not be assumed to be unique after 10 characters, Acorn implemented the C compiler to strip the trailing C<.c> C<.h> C<.s> and C<.o> suffix from filenames specified in source code and store the respective files in -subdirectories named after the suffix. Hence files are translated: +subdirectories named after the suffix. Hence files are translated: foo.h h.foo C:foo.h C:h.foo (logical path variable) @@ -949,100 +1118,120 @@ subdirectories named after the suffix. Hence files are translated: 11charname_.c c.11charname (assuming filesystem truncates at 10) The Unix emulation library's translation of filenames to native assumes -that this sort of translation is required, and allows a user defined list -of known suffixes which it will transpose in this fashion. This may -appear transparent, but consider that with these rules C<foo/bar/baz.h> +that this sort of translation is required, and it allows a user-defined list +of known suffixes that it will transpose in this fashion. This may +seem transparent, but consider that with these rules C<foo/bar/baz.h> and C<foo/bar/h/baz> both map to C<foo.bar.h.baz>, and that C<readdir> and C<glob> cannot and do not attempt to emulate the reverse mapping. Other -C<.>s in filenames are translated to C</>. +C<.>'s in filenames are translated to C</>. -As implied above the environment accessed through C<%ENV> is global, and +As implied above, the environment accessed through C<%ENV> is global, and the convention is that program specific environment variables are of the -form C<Program$Name>. Each filing system maintains a current directory, -and the current filing system's current directory is the B<global> current -directory. Consequently, sociable scripts don't change the current -directory but rely on full pathnames, and scripts (and Makefiles) cannot +form C<Program$Name>. Each filesystem maintains a current directory, +and the current filesystem's current directory is the B<global> current +directory. Consequently, sociable programs don't change the current +directory but rely on full pathnames, and programs (and Makefiles) cannot assume that they can spawn a child process which can change the current directory without affecting its parent (and everyone else for that matter). -As native operating system filehandles are global and currently are -allocated down from 255, with 0 being a reserved value the Unix emulation +Because native operating system filehandles are global and are currently +allocated down from 255, with 0 being a reserved value, the Unix emulation library emulates Unix filehandles. Consequently, you can't rely on passing C<STDIN>, C<STDOUT>, or C<STDERR> to your children. The desire of users to express filenames of the form -C<E<lt>Foo$DirE<gt>.Bar> on the command line unquoted causes problems, +C<< <Foo$Dir>.Bar >> on the command line unquoted causes problems, too: C<``> command output capture has to perform a guessing game. It -assumes that a string C<E<lt>[^E<lt>E<gt>]+\$[^E<lt>E<gt>]E<gt>> is a +assumes that a string C<< <[^<>]+\$[^<>]> >> is a reference to an environment variable, whereas anything else involving -C<E<lt>> or C<E<gt>> is redirection, and generally manages to be 99% +C<< < >> or C<< > >> is redirection, and generally manages to be 99% right. Of course, the problem remains that scripts cannot rely on any Unix tools being available, or that any tools found have Unix-like command line arguments. -Extensions and XS are, in theory, buildable by anyone using free tools. -In practice, many don't, as users of the Acorn platform are used to binary -distribution. MakeMaker does run, but no available make currently copes -with MakeMaker's makefiles; even if/when this is fixed, the lack of a -Unix-like shell can cause problems with makefile rules, especially lines -of the form C<cd sdbm && make all>, and anything using quoting. +Extensions and XS are, in theory, buildable by anyone using free +tools. In practice, many don't, as users of the Acorn platform are +used to binary distributions. MakeMaker does run, but no available +make currently copes with MakeMaker's makefiles; even if and when +this should be fixed, the lack of a Unix-like shell will cause +problems with makefile rules, especially lines of the form C<cd +sdbm && make all>, and anything using quoting. "S<RISC OS>" is the proper name for the operating system, but the value in C<$^O> is "riscos" (because we don't like shouting). -Also see: +=head2 Other perls + +Perl has been ported to many platforms that do not fit into any of +the categories listed above. Some, such as AmigaOS, Atari MiNT, +BeOS, HP MPE/iX, QNX, Plan 9, and VOS, have been well-integrated +into the standard Perl source code kit. You may need to see the +F<ports/> directory on CPAN for information, and possibly binaries, +for the likes of: aos, Atari ST, lynxos, riscos, Novell Netware, +Tandem Guardian, I<etc.> (Yes, we know that some of these OSes may +fall under the Unix category, but we are not a standards body.) + +Some approximate operating system names and their C<$^O> values +in the "OTHER" category include: + + OS $^O $Config{'archname'} + ------------------------------------------ + Amiga DOS amigaos m68k-amigos + MPE/iX mpeix PA-RISC1.1 + +See also: =over 4 -=item perl list +=item * -=back +Amiga, F<README.amiga> (installed as L<perlamiga>). +=item * -=head2 Other perls +Atari, F<README.mint> and Guido Flohr's web page +http://stud.uni-sb.de/~gufl0000/ -Perl has been ported to a variety of platforms that do not fit into any of -the above categories. Some, such as AmigaOS, BeOS, QNX, and Plan 9, have -been well-integrated into the standard Perl source code kit. You may need -to see the F<ports/> directory on CPAN for information, and possibly -binaries, for the likes of: aos, atari, lynxos, riscos, Tandem Guardian, -vos, I<etc.> (yes we know that some of these OSes may fall under the Unix -category, but we are not a standards body.) +=item * -See also: +Be OS, F<README.beos> -=over 4 +=item * -=item Atari, Guido Flohr's page C<http://stud.uni-sb.de/~gufl0000/> +HP 300 MPE/iX, F<README.mpeix> and Mark Bixby's web page +http://www.cccd.edu/~markb/perlix.html -=item HP 300 MPE/iX C<http://www.cccd.edu/~markb/perlix.html> +=item * -=item Novell Netware +A free perl5-based PERL.NLM for Novell Netware is available in +precompiled binary and source code form from http://www.novell.com/ +as well as from CPAN. -A free perl5-based PERL.NLM for Novell Netware is available from -C<http://www.novell.com/> +=item -=back +Plan 9, F<README.plan9> +=back =head1 FUNCTION IMPLEMENTATIONS -Listed below are functions unimplemented or implemented differently on -various platforms. Following each description will be, in parentheses, a -list of platforms that the description applies to. +Listed below are functions that are either completely unimplemented +or else have been implemented differently on various platforms. +Following each description will be, in parentheses, a list of +platforms that the description applies to. -The list may very well be incomplete, or wrong in some places. When in -doubt, consult the platform-specific README files in the Perl source -distribution, and other documentation resources for a given port. +The list may well be incomplete, or even wrong in some places. When +in doubt, consult the platform-specific README files in the Perl +source distribution, and any other documentation resources accompanying +a given port. Be aware, moreover, that even among Unix-ish systems there are variations. -For many functions, you can also query C<%Config>, exported by default -from C<Config.pm>. For example, to check if the platform has the C<lstat> -call, check C<$Config{'d_lstat'}>. See L<Config.pm> for a full -description of available variables. - +For many functions, you can also query C<%Config>, exported by +default from the Config module. For example, to check whether the +platform has the C<lstat> call, check C<$Config{d_lstat}>. See +L<Config> for a full description of available variables. =head2 Alphabetical Listing of Perl Functions @@ -1054,19 +1243,19 @@ description of available variables. =item -X -C<-r>, C<-w>, and C<-x> have only a very limited meaning; directories +C<-r>, C<-w>, and C<-x> have a limited meaning only; directories and applications are executable, and there are no uid/gid -considerations. C<-o> is not supported. (S<Mac OS>) +considerations. C<-o> is not supported. (S<Mac OS>) -C<-r>, C<-w>, C<-x>, and C<-o> tell whether or not file is accessible, -which may not reflect UIC-based file protections. (VMS) +C<-r>, C<-w>, C<-x>, and C<-o> tell whether the file is accessible, +which may not reflect UIC-based file protections. (VMS) C<-s> returns the size of the data fork, not the total size of data fork plus resource fork. (S<Mac OS>). C<-s> by name on an open file will return the space reserved on disk, rather than the current extent. C<-s> on an open filehandle returns the -current size. (S<RISC OS>) +current size. (S<RISC OS>) C<-R>, C<-W>, C<-X>, C<-O> are indistinguishable from C<-r>, C<-w>, C<-x>, C<-o>. (S<Mac OS>, Win32, VMS, S<RISC OS>) @@ -1082,17 +1271,23 @@ C<-d> is true if passed a device spec without an explicit directory. C<-T> and C<-B> are implemented, but might misclassify Mac text files with foreign characters; this is the case will all platforms, but may -affect S<Mac OS> often. (S<Mac OS>) +affect S<Mac OS> often. (S<Mac OS>) C<-x> (or C<-X>) determine if a file ends in one of the executable -suffixes. C<-S> is meaningless. (Win32) +suffixes. C<-S> is meaningless. (Win32) C<-x> (or C<-X>) determine if a file has an executable file type. (S<RISC OS>) +=item alarm SECONDS + +=item alarm + +Not implemented. (Win32) + =item binmode FILEHANDLE -Meaningless. (S<Mac OS>, S<RISC OS>) +Meaningless. (S<Mac OS>, S<RISC OS>) Reopens file and restores pointer; if function fails, underlying filehandle may be closed, or pointer may be in a different position. @@ -1103,7 +1298,7 @@ the filehandle may be flushed. (Win32) =item chmod LIST -Only limited meaning. Disabling/enabling write permission is mapped to +Only limited meaning. Disabling/enabling write permission is mapped to locking/unlocking the file. (S<Mac OS>) Only good for changing "owner" read-write access, "group", and "other" @@ -1154,6 +1349,9 @@ Not implemented. (S<Mac OS>) Implemented via Spawn. (VM/ESA) +Does not automatically flush output handles on some platforms. +(SunOS, Solaris, HP-UX) + =item fcntl FILEHANDLE,FUNCTION,SCALAR Not implemented. (Win32, VMS) @@ -1166,7 +1364,12 @@ Available only on Windows NT (not on Windows 95). (Win32) =item fork -Not implemented. (S<Mac OS>, Win32, AmigaOS, S<RISC OS>, VOS, VM/ESA) +Not implemented. (S<Mac OS>, AmigaOS, S<RISC OS>, VOS, VM/ESA) + +Emulated using multiple interpreters. See L<perlfork>. (Win32) + +Does not automatically flush output handles on some platforms. +(SunOS, Solaris, HP-UX) =item getlogin @@ -1270,11 +1473,11 @@ Not implemented. (Plan9, Win32, S<RISC OS>) =item endpwent -Not implemented. (S<Mac OS>, Win32, VM/ESA) +Not implemented. (S<Mac OS>, MPE/iX, VM/ESA, Win32) =item endgrent -Not implemented. (S<Mac OS>, Win32, VMS, S<RISC OS>, VM/ESA) +Not implemented. (S<Mac OS>, MPE/iX, S<RISC OS>, VM/ESA, VMS, Win32) =item endhostent @@ -1303,14 +1506,8 @@ Not implemented. (S<Mac OS>, Plan9) Globbing built-in, but only C<*> and C<?> metacharacters are supported. (S<Mac OS>) -Features depend on external perlglob.exe or perlglob.bat. May be -overridden with something like File::DosGlob, which is recommended. -(Win32) - -Globbing built-in, but only C<*> and C<?> metacharacters are supported. -Globbing relies on operating system calls, which may return filenames -in any order. As most filesystems are case-insensitive, even "sorted" -filenames will not be in case-sensitive order. (S<RISC OS>) +This operator is implemented via the File::Glob extension on most +platforms. See L<File::Glob> for portability information. =item ioctl FILEHANDLE,FUNCTION,SCALAR @@ -1321,21 +1518,28 @@ in the Winsock API does. (Win32) Available only for socket handles. (S<RISC OS>) -=item kill LIST +=item kill SIGNAL, LIST Not implemented, hence not useful for taint checking. (S<Mac OS>, S<RISC OS>) -Available only for process handles returned by the C<system(1, ...)> -method of spawning a process. (Win32) +C<kill()> doesn't have the semantics of C<raise()>, i.e. it doesn't send +a signal to the identified process like it does on Unix platforms. +Instead C<kill($sig, $pid)> terminates the process identified by $pid, +and makes it exit immediately with exit status $sig. As in Unix, if +$sig is 0 and the specified process exists, it returns true without +actually terminating it. (Win32) =item link OLDFILE,NEWFILE -Not implemented. (S<Mac OS>, Win32, VMS, S<RISC OS>) +Not implemented. (S<Mac OS>, MPE/iX, VMS, S<RISC OS>) Link count not updated because hard links are not quite that hard (They are sort of half-way between hard and soft links). (AmigaOS) +Hard links are implemented on Win32 (Windows NT and Windows 2000) +under NTFS only. + =item lstat FILEHANDLE =item lstat EXPR @@ -1344,7 +1548,7 @@ Link count not updated because hard links are not quite that hard Not implemented. (VMS, S<RISC OS>) -Return values may be bogus. (Win32) +Return values (especially for device and inode) may be bogus. (Win32) =item msgctl ID,CMD,ARG @@ -1360,11 +1564,14 @@ Not implemented. (S<Mac OS>, Win32, VMS, Plan9, S<RISC OS>, VOS) =item open FILEHANDLE -The C<|> variants are only supported if ToolServer is installed. +The C<|> variants are supported only if ToolServer is installed. (S<Mac OS>) open to C<|-> and C<-|> are unsupported. (S<Mac OS>, Win32, S<RISC OS>) +Opening a process does not automatically flush output handles on some +platforms. (SunOS, Solaris, HP-UX) + =item pipe READHANDLE,WRITEHANDLE Not implemented. (S<Mac OS>) @@ -1383,6 +1590,8 @@ Only implemented on sockets. (Win32) Only reliable on sockets. (S<RISC OS>) +Note that the C<socket FILEHANDLE> form is generally portable. + =item semctl ID,SEMNUM,CMD,ARG =item semget KEY,NSEMS,FLAGS @@ -1391,6 +1600,10 @@ Only reliable on sockets. (S<RISC OS>) Not implemented. (S<Mac OS>, Win32, VMS, S<RISC OS>, VOS) +=item setgrent + +Not implemented. (MPE/iX, Win32) + =item setpgrp PID,PGRP Not implemented. (S<Mac OS>, Win32, VMS, S<RISC OS>, VOS) @@ -1399,6 +1612,10 @@ Not implemented. (S<Mac OS>, Win32, VMS, S<RISC OS>, VOS) Not implemented. (S<Mac OS>, Win32, VMS, S<RISC OS>, VOS) +=item setpwent + +Not implemented. (MPE/iX, Win32) + =item setsockopt SOCKET,LEVEL,OPTNAME,OPTVAL Not implemented. (S<Mac OS>, Plan9) @@ -1453,14 +1670,17 @@ OS>, OS/390, VM/ESA) Only implemented if ToolServer is installed. (S<Mac OS>) As an optimization, may not call the command shell specified in -C<$ENV{PERL5SHELL}>. C<system(1, @args)> spawns an external +C<$ENV{PERL5SHELL}>. C<system(1, @args)> spawns an external process and immediately returns its process designator, without waiting for it to terminate. Return value may be used subsequently -in C<wait> or C<waitpid>. (Win32) +in C<wait> or C<waitpid>. Failure to spawn() a subprocess is indicated +by setting $? to "255 << 8". C<$?> is set in a way compatible with +Unix (i.e. the exitstatus of the subprocess is obtained by "$? >> 8", +as described in the documentation). (Win32) There is no shell to process metacharacters, and the native standard is to pass a command line terminated by "\n" "\r" or "\0" to the spawned -program. Redirection such as C<E<gt> foo> is performed (if at all) by +program. Redirection such as C<< > foo >> is performed (if at all) by the run time library of the spawned program. C<system> I<list> will call the Unix emulation library's C<exec> emulation, which attempts to provide emulation of the stdin, stdout, stderr in force in the parent, providing @@ -1471,15 +1691,19 @@ of a child Unix program will exists. Mileage B<will> vary. (S<RISC OS>) Far from being POSIX compliant. Because there may be no underlying /bin/sh tries to work around the problem by forking and execing the first token in its argument string. Handles basic redirection -("E<lt>" or "E<gt>") on its own behalf. (MiNT) +("<" or ">") on its own behalf. (MiNT) + +Does not automatically flush output handles on some platforms. +(SunOS, Solaris, HP-UX) =item times Only the first entry returned is nonzero. (S<Mac OS>) -"cumulative" times will be bogus. On anything other than Windows NT, -"system" time will be bogus, and "user" time is actually the time -returned by the clock() function in the C runtime library. (Win32) +"cumulative" times will be bogus. On anything other than Windows NT +or Windows 2000, "system" time will be bogus, and "user" time is +actually the time returned by the clock() function in the C runtime +library. (Win32) Not useful. (S<RISC OS>) @@ -1502,8 +1726,8 @@ should not be held open elsewhere. (Win32) Returns undef where unavailable, as of version 5.005. -C<umask()> works but the correct permissions are only set when the file -is finally close()d. (AmigaOS) +C<umask> works but the correct permissions are set only when the file +is finally closed. (AmigaOS) =item utime LIST @@ -1532,7 +1756,45 @@ Not useful. (S<RISC OS>) =over 4 -=item v1.39, 11 February, 1999 +=item v1.47, 22 March 2000 + +Various cleanups from Tom Christiansen, including migration of +long platform listings from L<perl>. + +=item v1.46, 12 February 2000 + +Updates for VOS and MPE/iX. (Peter Prymmer) Other small changes. + +=item v1.45, 20 December 1999 + +Small changes from 5.005_63 distribution, more changes to EBCDIC info. + +=item v1.44, 19 July 1999 + +A bunch of updates from Peter Prymmer for C<$^O> values, +endianness, File::Spec, VMS, BS2000, OS/400. + +=item v1.43, 24 May 1999 + +Added a lot of cleaning up from Tom Christiansen. + +=item v1.42, 22 May 1999 + +Added notes about tests, sprintf/printf, and epoch offsets. + +=item v1.41, 19 May 1999 + +Lots more little changes to formatting and content. + +Added a bunch of C<$^O> and related values +for various platforms; fixed mail and web addresses, and added +and changed miscellaneous notes. (Peter Prymmer) + +=item v1.40, 11 April 1999 + +Miscellaneous changes. + +=item v1.39, 11 February 1999 Changes from Jarkko and EMX URL fixes Michael Schwern. Additional note about newlines added. @@ -1574,40 +1836,195 @@ First public release with perl5.005. =back +=head1 Supported Platforms + +As of early March 2000 (the Perl release 5.6.0), the following +platforms are able to build Perl from the standard source code +distribution available at http://www.perl.com/CPAN/src/index.html + + AIX + DOS DJGPP 1) + FreeBSD + HP-UX + IRIX + Linux + LynxOS + MachTen + MPE/iX + NetBSD + OpenBSD + OS/2 + QNX + Rhapsody/Darwin 2) + SCO SV + SINIX + Solaris + SVR4 + Tru64 UNIX 3) + UNICOS + UNICOS/mk + Unixware + VMS + VOS + Windows 3.1 1) + Windows 95 1) 4) + Windows 98 1) 4) + Windows NT 1) 4) + + 1) in DOS mode either the DOS or OS/2 ports can be used + 2) new in 5.6.0: the BSD/NeXT-based UNIX of Mac OS X + 3) formerly known as Digital UNIX and before that DEC OSF/1 + 4) compilers: Borland, Cygwin, Mingw32 EGCS/GCC, VC++ + +The following platforms worked for the previous major release +(5.005_03 being the latest maintenance release of that, as of early +March 2000), but be did not manage to test these in time for the 5.6.0 +release of Perl. There is a very good chance that these will work +just fine with 5.6.0. + + A/UX + BeOS + BSD/OS + DG/UX + DYNIX/ptx + DomainOS + Hurd + NextSTEP + OpenSTEP + PowerMAX + SCO ODT/OSR + SunOS + Ultrix + +The following platform worked for the previous major release (5.005_03 +being the latest maintenance release of that, as of early March 2000). +However, standardization on UTF-8 as the internal string representation +in 5.6.0 has introduced incompatibilities in this EBCDIC platform. +Support for this platform may be enabled in a future release: + + OS390 1) + + 1) Previously known as MVS, or OpenEdition MVS. + +Strongly related to the OS390 platform by also being EBCDIC-based +mainframe platforms are the following platforms: + + BS2000 + VM/ESA + +These are also not expected to work under 5.6.0 for the same reasons +as OS390. Contact the mailing list perl-mvs@perl.org for more details. + +MacOS (Classic, pre-X) is almost 5.6.0-ready; building from the source +does work with 5.6.0, but additional MacOS specific source code is needed +for a complete port. Contact the mailing list macperl-porters@macperl.org +for more information. + +The following platforms have been known to build Perl from source in +the past, but we haven't been able to verify their status for the +current release, either because the hardware/software platforms are +rare or because we don't have an active champion on these +platforms--or both: + + 3b1 + AmigaOS + ConvexOS + CX/UX + DC/OSx + DDE SMES + DOS EMX + Dynix + EP/IX + ESIX + FPS + GENIX + Greenhills + ISC + MachTen 68k + MiNT + MPC + NEWS-OS + Opus + Plan 9 + PowerUX + RISC/os + Stellar + SVR2 + TI1500 + TitanOS + Unisys Dynix + Unixware + +Support for the following platform is planned for a future Perl release: + + Netware + +The following platforms have their own source code distributions and +binaries available via http://www.perl.com/CPAN/ports/index.html: + + Perl release + + AS/400 5.003 + Netware 5.003_07 + Tandem Guardian 5.004 + +The following platforms have only binaries available via +http://www.perl.com/CPAN/ports/index.html : + + Perl release + + Acorn RISCOS 5.005_02 + AOS 5.002 + LynxOS 5.004_02 + +Although we do suggest that you always build your own Perl from +the source code, both for maximal configurability and for security, +in case you are in a hurry you can check +http://www.perl.com/CPAN/ports/index.html for binary distributions. + +=head1 SEE ALSO + +L<perlamiga>, L<perlcygwin>, L<perldos>, L<perlhpux>, L<perlos2>, +L<perlos390>, L<perlwin32>, L<perlvms>, and L<Win32>. + =head1 AUTHORS / CONTRIBUTORS -Abigail E<lt>abigail@fnx.comE<gt>, -Charles Bailey E<lt>bailey@newman.upenn.eduE<gt>, -Graham Barr E<lt>gbarr@pobox.comE<gt>, -Tom Christiansen E<lt>tchrist@perl.comE<gt>, -Nicholas Clark E<lt>Nicholas.Clark@liverpool.ac.ukE<gt>, -Andy Dougherty E<lt>doughera@lafcol.lafayette.eduE<gt>, -Dominic Dunlop E<lt>domo@vo.luE<gt>, -Neale Ferguson E<lt>neale@mailbox.tabnsw.com.auE<gt> -Paul Green E<lt>Paul_Green@stratus.comE<gt>, -M.J.T. Guy E<lt>mjtg@cus.cam.ac.ukE<gt>, -Jarkko Hietaniemi E<lt>jhi@iki.fi<gt>, -Luther Huffman E<lt>lutherh@stratcom.comE<gt>, -Nick Ing-Simmons E<lt>nick@ni-s.u-net.comE<gt>, -Andreas J. KE<ouml>nig E<lt>koenig@kulturbox.deE<gt>, -Markus Laker E<lt>mlaker@contax.co.ukE<gt>, -Andrew M. Langmead E<lt>aml@world.std.comE<gt>, -Paul Moore E<lt>Paul.Moore@uk.origin-it.comE<gt>, -Chris Nandor E<lt>pudge@pobox.comE<gt>, -Matthias Neeracher E<lt>neeri@iis.ee.ethz.chE<gt>, -Gary Ng E<lt>71564.1743@CompuServe.COME<gt>, -Tom Phoenix E<lt>rootbeer@teleport.comE<gt>, -Peter Prymmer E<lt>pvhp@forte.comE<gt>, -Hugo van der Sanden E<lt>hv@crypt0.demon.co.ukE<gt>, -Gurusamy Sarathy E<lt>gsar@umich.eduE<gt>, -Paul J. Schinder E<lt>schinder@pobox.comE<gt>, -Michael G Schwern E<lt>schwern@pobox.comE<gt>, -Dan Sugalski E<lt>sugalskd@ous.eduE<gt>, -Nathan Torkington E<lt>gnat@frii.comE<gt>. +Abigail <abigail@fnx.com>, +Charles Bailey <bailey@newman.upenn.edu>, +Graham Barr <gbarr@pobox.com>, +Tom Christiansen <tchrist@perl.com>, +Nicholas Clark <Nicholas.Clark@liverpool.ac.uk>, +Thomas Dorner <Thomas.Dorner@start.de>, +Andy Dougherty <doughera@lafcol.lafayette.edu>, +Dominic Dunlop <domo@vo.lu>, +Neale Ferguson <neale@mailbox.tabnsw.com.au>, +David J. Fiander <davidf@mks.com>, +Paul Green <Paul_Green@stratus.com>, +M.J.T. Guy <mjtg@cus.cam.ac.uk>, +Jarkko Hietaniemi <jhi@iki.fi<gt>, +Luther Huffman <lutherh@stratcom.com>, +Nick Ing-Simmons <nick@ni-s.u-net.com>, +Andreas J. KE<ouml>nig <koenig@kulturbox.de>, +Markus Laker <mlaker@contax.co.uk>, +Andrew M. Langmead <aml@world.std.com>, +Larry Moore <ljmoore@freespace.net>, +Paul Moore <Paul.Moore@uk.origin-it.com>, +Chris Nandor <pudge@pobox.com>, +Matthias Neeracher <neeri@iis.ee.ethz.ch>, +Gary Ng <71564.1743@CompuServe.COM>, +Tom Phoenix <rootbeer@teleport.com>, +AndrE<eacute> Pirard <A.Pirard@ulg.ac.be>, +Peter Prymmer <pvhp@forte.com>, +Hugo van der Sanden <hv@crypt0.demon.co.uk>, +Gurusamy Sarathy <gsar@activestate.com>, +Paul J. Schinder <schinder@pobox.com>, +Michael G Schwern <schwern@pobox.com>, +Dan Sugalski <sugalskd@ous.edu>, +Nathan Torkington <gnat@frii.com>. This document is maintained by Chris Nandor -E<lt>pudge@pobox.comE<gt>. +<pudge@pobox.com>. =head1 VERSION -Version 1.39, last modified 11 February 1999 +Version 1.47, last modified 22 March 2000 diff --git a/contrib/perl5/pod/perlre.pod b/contrib/perl5/pod/perlre.pod index d4c1dee..e1f30a3 100644 --- a/contrib/perl5/pod/perlre.pod +++ b/contrib/perl5/pod/perlre.pod @@ -6,13 +6,13 @@ perlre - Perl regular expressions This page describes the syntax of regular expressions in Perl. For a description of how to I<use> regular expressions in matching -operations, plus various examples of the same, see discussion +operations, plus various examples of the same, see discussions of C<m//>, C<s///>, C<qr//> and C<??> in L<perlop/"Regexp Quote-Like Operators">. -The matching operations can have various modifiers. The modifiers +Matching operations can have various modifiers. Modifiers that relate to the interpretation of the regular expression inside -are listed below. For the modifiers that alter the way a regular expression -is used by Perl, see L<perlop/"Regexp Quote-Like Operators"> and +are listed below. Modifiers that alter the way a regular expression +is used by Perl are detailed in L<perlop/"Regexp Quote-Like Operators"> and L<perlop/"Gory details of parsing quoted constructs">. =over 4 @@ -27,20 +27,21 @@ locale. See L<perllocale>. =item m Treat string as multiple lines. That is, change "^" and "$" from matching -at only the very start or end of the string to the start or end of any -line anywhere within the string, +the start or end of the string to matching the start or end of any +line anywhere within the string. =item s Treat string as single line. That is, change "." to match any character -whatsoever, even a newline, which it normally would not match. +whatsoever, even a newline, which normally it would not match. -The C</s> and C</m> modifiers both override the C<$*> setting. That is, no matter -what C<$*> contains, C</s> without C</m> will force "^" to match only at the -beginning of the string and "$" to match only at the end (or just before a -newline at the end) of the string. Together, as /ms, they let the "." match -any character whatsoever, while yet allowing "^" and "$" to match, -respectively, just after and just before newlines within the string. +The C</s> and C</m> modifiers both override the C<$*> setting. That +is, no matter what C<$*> contains, C</s> without C</m> will force +"^" to match only at the beginning of the string and "$" to match +only at the end (or just before a newline at the end) of the string. +Together, as /ms, they let the "." match any character whatsoever, +while yet allowing "^" and "$" to match, respectively, just after +and just before newlines within the string. =item x @@ -49,9 +50,9 @@ Extend your pattern's legibility by permitting whitespace and comments. =back These are usually written as "the C</x> modifier", even though the delimiter -in question might not actually be a slash. In fact, any of these +in question might not really be a slash. Any of these modifiers may also be embedded within the regular expression itself using -the new C<(?...)> construct. See below. +the C<(?...)> construct. See below. The C</x> modifier itself needs a little more explanation. It tells the regular expression parser to ignore whitespace that is neither @@ -59,7 +60,7 @@ backslashed nor within a character class. You can use this to break up your regular expression into (slightly) more readable parts. The C<#> character is also treated as a metacharacter introducing a comment, just as in ordinary Perl code. This also means that if you want real -whitespace or C<#> characters in the pattern (outside of a character +whitespace or C<#> characters in the pattern (outside a character class, where they are unaffected by C</x>), that you'll either have to escape them or encode them using octal or hex escapes. Taken together, these features go a long way towards making Perl's regular expressions @@ -70,11 +71,11 @@ in L<perlop>. =head2 Regular Expressions -The patterns used in pattern matching are regular expressions such as -those supplied in the Version 8 regex routines. (In fact, the -routines are derived (distantly) from Henry Spencer's freely -redistributable reimplementation of the V8 routines.) -See L<Version 8 Regular Expressions> for details. +The patterns used in Perl pattern matching derive from supplied in +the Version 8 regex routines. (The routines are derived +(distantly) from Henry Spencer's freely redistributable reimplementation +of the V8 routines.) See L<Version 8 Regular Expressions> for +details. In particular the following metacharacters have their standard I<egrep>-ish meanings: @@ -87,9 +88,9 @@ meanings: () Grouping [] Character class -By default, the "^" character is guaranteed to match at only the -beginning of the string, the "$" character at only the end (or before the -newline at the end) and Perl does certain optimizations with the +By default, the "^" character is guaranteed to match only the +beginning of the string, the "$" character only the end (or before the +newline at the end), and Perl does certain optimizations with the assumption that the string contains only one line. Embedded newlines will not be matched by "^" or "$". You may, however, wish to treat a string as a multi-line buffer, such that the "^" will match after any @@ -98,7 +99,7 @@ cost of a little more overhead, you can do this by using the /m modifier on the pattern match operator. (Older programs did this by setting C<$*>, but this practice is now deprecated.) -To facilitate multi-line substitutions, the "." character never matches a +To simplify multi-line substitutions, the "." character never matches a newline unless you use the C</s> modifier, which in effect tells Perl to pretend the string is a single line--even if it isn't. The C</s> modifier also overrides the setting of C<$*>, in case you have some (badly behaved) older @@ -120,7 +121,7 @@ to integral values less than a preset limit defined when perl is built. This is usually 32766 on the most common platforms. The actual limit can be seen in the error message generated by code such as this: - $_ **= $_ , / {$_} / for 2 .. 42; + $_ **= $_ , / {$_} / for 2 .. 42; By default, a quantified subpattern is "greedy", that is, it will match as many times as possible (given a particular starting location) while still @@ -146,7 +147,9 @@ also work: \e escape (think troff) (ESC) \033 octal char (think of a PDP-11) \x1B hex char + \x{263a} wide hex char (Unicode SMILEY) \c[ control char + \N{name} named char \l lowercase next char (think vi) \u uppercase next char (think vi) \L lowercase till \E (think vi) @@ -155,7 +158,8 @@ also work: \Q quote (disable) pattern metacharacters till \E If C<use locale> is in effect, the case map used by C<\l>, C<\L>, C<\u> -and C<\U> is taken from the current locale. See L<perllocale>. +and C<\U> is taken from the current locale. See L<perllocale>. For +documentation of C<\N{name}>, see L<charnames>. You cannot include a literal C<$> or C<@> within a C<\Q> sequence. An unescaped C<$> or C<@> interpolates the corresponding variable, @@ -170,13 +174,114 @@ In addition, Perl defines the following: \S Match a non-whitespace character \d Match a digit character \D Match a non-digit character + \pP Match P, named property. Use \p{Prop} for longer names. + \PP Match non-P + \X Match eXtended Unicode "combining character sequence", + equivalent to C<(?:\PM\pM*)> + \C Match a single C char (octet) even under utf8. + +A C<\w> matches a single alphanumeric character, not a whole word. +Use C<\w+> to match a string of Perl-identifier characters (which isn't +the same as matching an English word). If C<use locale> is in effect, the +list of alphabetic characters generated by C<\w> is taken from the +current locale. See L<perllocale>. You may use C<\w>, C<\W>, C<\s>, C<\S>, +C<\d>, and C<\D> within character classes, but if you try to use them +as endpoints of a range, that's not a range, the "-" is understood literally. +See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>. + +The POSIX character class syntax + + [:class:] + +is also available. The available classes and their backslash +equivalents (if available) are as follows: + + alpha + alnum + ascii + cntrl + digit \d + graph + lower + print + punct + space \s + upper + word \w + xdigit + +For example use C<[:upper:]> to match all the uppercase characters. +Note that the C<[]> are part of the C<[::]> construct, not part of the whole +character class. For example: + + [01[:alpha:]%] + +matches one, zero, any alphabetic character, and the percentage sign. + +If the C<utf8> pragma is used, the following equivalences to Unicode +\p{} constructs hold: + + alpha IsAlpha + alnum IsAlnum + ascii IsASCII + cntrl IsCntrl + digit IsDigit + graph IsGraph + lower IsLower + print IsPrint + punct IsPunct + space IsSpace + upper IsUpper + word IsWord + xdigit IsXDigit + +For example C<[:lower:]> and C<\p{IsLower}> are equivalent. + +If the C<utf8> pragma is not used but the C<locale> pragma is, the +classes correlate with the isalpha(3) interface (except for `word', +which is a Perl extension, mirroring C<\w>). + +The assumedly non-obviously named classes are: -A C<\w> matches a single alphanumeric character, not a whole -word. To match a word you'd need to say C<\w+>. If C<use locale> is in -effect, the list of alphabetic characters generated by C<\w> is taken -from the current locale. See L<perllocale>. You may use C<\w>, C<\W>, -C<\s>, C<\S>, C<\d>, and C<\D> within character classes (though not as -either end of a range). +=over 4 + +=item cntrl + +Any control character. Usually characters that don't produce output as +such but instead control the terminal somehow: for example newline and +backspace are control characters. All characters with ord() less than +32 are most often classified as control characters. + +=item graph + +Any alphanumeric or punctuation character. + +=item print + +Any alphanumeric or punctuation character or space. + +=item punct + +Any punctuation character. + +=item xdigit + +Any hexadecimal digit. Though this may feel silly (/0-9a-f/i would +work just fine) it is included for completeness. + +=back + +You can negate the [::] character classes by prefixing the class name +with a '^'. This is a Perl extension. For example: + + POSIX trad. Perl utf8 Perl + + [:^digit:] \D \P{IsDigit} + [:^space:] \S \P{IsSpace} + [:^word:] \W \P{IsWord} + +The POSIX character classes [.cc.] and [=cc=] are recognized but +B<not> supported and trying to use them will cause an error. Perl defines the following zero-width assertions: @@ -185,101 +290,163 @@ Perl defines the following zero-width assertions: \A Match only at beginning of string \Z Match only at end of string, or before newline at the end \z Match only at end of string - \G Match only where previous m//g left off (works only with /g) - -A word boundary (C<\b>) is defined as a spot between two characters that -has a C<\w> on one side of it and a C<\W> on the other side of it (in -either order), counting the imaginary characters off the beginning and -end of the string as matching a C<\W>. (Within character classes C<\b> -represents backspace rather than a word boundary.) The C<\A> and C<\Z> are -just like "^" and "$", except that they won't match multiple times when the -C</m> modifier is used, while "^" and "$" will match at every internal line -boundary. To match the actual end of the string, not ignoring newline, -you can use C<\z>. The C<\G> assertion can be used to chain global -matches (using C<m//g>), as described in -L<perlop/"Regexp Quote-Like Operators">. - -It is also useful when writing C<lex>-like scanners, when you have several -patterns that you want to match against consequent substrings of your -string, see the previous reference. -The actual location where C<\G> will match can also be influenced -by using C<pos()> as an lvalue. See L<perlfunc/pos>. - -When the bracketing construct C<( ... )> is used, \E<lt>digitE<gt> matches the -digit'th substring. Outside of the pattern, always use "$" instead of "\" -in front of the digit. (While the \E<lt>digitE<gt> notation can on rare occasion work -outside the current pattern, this should not be relied upon. See the -WARNING below.) The scope of $E<lt>digitE<gt> (and C<$`>, C<$&>, and C<$'>) -extends to the end of the enclosing BLOCK or eval string, or to the next -successful pattern match, whichever comes first. If you want to use -parentheses to delimit a subpattern (e.g., a set of alternatives) without -saving it as a subpattern, follow the ( with a ?:. - -You may have as many parentheses as you wish. If you have more -than 9 substrings, the variables $10, $11, ... refer to the -corresponding substring. Within the pattern, \10, \11, etc. refer back -to substrings if there have been at least that many left parentheses before -the backreference. Otherwise (for backward compatibility) \10 is the -same as \010, a backspace, and \11 the same as \011, a tab. And so -on. (\1 through \9 are always backreferences.) - -C<$+> returns whatever the last bracket match matched. C<$&> returns the -entire matched string. (C<$0> used to return the same thing, but not any -more.) C<$`> returns everything before the matched string. C<$'> returns -everything after the matched string. Examples: + \G Match only at pos() (e.g. at the end-of-match position + of prior m//g) + +A word boundary (C<\b>) is a spot between two characters +that has a C<\w> on one side of it and a C<\W> on the other side +of it (in either order), counting the imaginary characters off the +beginning and end of the string as matching a C<\W>. (Within +character classes C<\b> represents backspace rather than a word +boundary, just as it normally does in any double-quoted string.) +The C<\A> and C<\Z> are just like "^" and "$", except that they +won't match multiple times when the C</m> modifier is used, while +"^" and "$" will match at every internal line boundary. To match +the actual end of the string and not ignore an optional trailing +newline, use C<\z>. + +The C<\G> assertion can be used to chain global matches (using +C<m//g>), as described in L<perlop/"Regexp Quote-Like Operators">. +It is also useful when writing C<lex>-like scanners, when you have +several patterns that you want to match against consequent substrings +of your string, see the previous reference. The actual location +where C<\G> will match can also be influenced by using C<pos()> as +an lvalue. See L<perlfunc/pos>. + +The bracketing construct C<( ... )> creates capture buffers. To +refer to the digit'th buffer use \<digit> within the +match. Outside the match use "$" instead of "\". (The +\<digit> notation works in certain circumstances outside +the match. See the warning below about \1 vs $1 for details.) +Referring back to another part of the match is called a +I<backreference>. + +There is no limit to the number of captured substrings that you may +use. However Perl also uses \10, \11, etc. as aliases for \010, +\011, etc. (Recall that 0 means octal, so \011 is the 9'th ASCII +character, a tab.) Perl resolves this ambiguity by interpreting +\10 as a backreference only if at least 10 left parentheses have +opened before it. Likewise \11 is a backreference only if at least +11 left parentheses have opened before it. And so on. \1 through +\9 are always interpreted as backreferences." + +Examples: s/^([^ ]*) *([^ ]*)/$2 $1/; # swap first two words - if (/Time: (..):(..):(..)/) { + if (/(.)\1/) { # find first doubled char + print "'$1' is the first doubled character\n"; + } + + if (/Time: (..):(..):(..)/) { # parse out values $hours = $1; $minutes = $2; $seconds = $3; } -Once perl sees that you need one of C<$&>, C<$`> or C<$'> anywhere in -the program, it has to provide them on each and every pattern match. -This can slow your program down. The same mechanism that handles -these provides for the use of $1, $2, etc., so you pay the same price -for each pattern that contains capturing parentheses. But if you never -use $&, etc., in your script, then patterns I<without> capturing -parentheses won't be penalized. So avoid $&, $', and $` if you can, -but if you can't (and some algorithms really appreciate them), once -you've used them once, use them at will, because you've already paid -the price. As of 5.005, $& is not so costly as the other two. - -Backslashed metacharacters in Perl are -alphanumeric, such as C<\b>, C<\w>, C<\n>. Unlike some other regular -expression languages, there are no backslashed symbols that aren't -alphanumeric. So anything that looks like \\, \(, \), \E<lt>, \E<gt>, -\{, or \} is always interpreted as a literal character, not a -metacharacter. This was once used in a common idiom to disable or -quote the special meanings of regular expression metacharacters in a -string that you want to use for a pattern. Simply quote all -non-alphanumeric characters: +Several special variables also refer back to portions of the previous +match. C<$+> returns whatever the last bracket match matched. +C<$&> returns the entire matched string. (At one point C<$0> did +also, but now it returns the name of the program.) C<$`> returns +everything before the matched string. And C<$'> returns everything +after the matched string. + +The numbered variables ($1, $2, $3, etc.) and the related punctuation +set (C<<$+>, C<$&>, C<$`>, and C<$'>) are all dynamically scoped +until the end of the enclosing block or until the next successful +match, whichever comes first. (See L<perlsyn/"Compound Statements">.) + +B<WARNING>: Once Perl sees that you need one of C<$&>, C<$`>, or +C<$'> anywhere in the program, it has to provide them for every +pattern match. This may substantially slow your program. Perl +uses the same mechanism to produce $1, $2, etc, so you also pay a +price for each pattern that contains capturing parentheses. (To +avoid this cost while retaining the grouping behaviour, use the +extended regular expression C<(?: ... )> instead.) But if you never +use C<$&>, C<$`> or C<$'>, then patterns I<without> capturing +parentheses will not be penalized. So avoid C<$&>, C<$'>, and C<$`> +if you can, but if you can't (and some algorithms really appreciate +them), once you've used them once, use them at will, because you've +already paid the price. As of 5.005, C<$&> is not so costly as the +other two. + +Backslashed metacharacters in Perl are alphanumeric, such as C<\b>, +C<\w>, C<\n>. Unlike some other regular expression languages, there +are no backslashed symbols that aren't alphanumeric. So anything +that looks like \\, \(, \), \<, \>, \{, or \} is always +interpreted as a literal character, not a metacharacter. This was +once used in a common idiom to disable or quote the special meanings +of regular expression metacharacters in a string that you want to +use for a pattern. Simply quote all non-alphanumeric characters: $pattern =~ s/(\W)/\\$1/g; -Now it is much more common to see either the quotemeta() function or -the C<\Q> escape sequence used to disable all metacharacters' special +Today it is more common to use the quotemeta() function or the C<\Q> +metaquoting escape sequence to disable all metacharacters' special meanings like this: /$unquoted\Q$quoted\E$unquoted/ -Perl defines a consistent extension syntax for regular expressions. -The syntax is a pair of parentheses with a question mark as the first -thing within the parentheses (this was a syntax error in older -versions of Perl). The character after the question mark gives the -function of the extension. Several extensions are already supported: +Beware that if you put literal backslashes (those not inside +interpolated variables) between C<\Q> and C<\E>, double-quotish +backslash interpolation may lead to confusing results. If you +I<need> to use literal backslashes within C<\Q...\E>, +consult L<perlop/"Gory details of parsing quoted constructs">. + +=head2 Extended Patterns + +Perl also defines a consistent extension syntax for features not +found in standard tools like B<awk> and B<lex>. The syntax is a +pair of parentheses with a question mark as the first thing within +the parentheses. The character after the question mark indicates +the extension. + +The stability of these extensions varies widely. Some have been +part of the core language for many years. Others are experimental +and may change without warning or be completely removed. Check +the documentation on an individual feature to verify its current +status. + +A question mark was chosen for this and for the minimal-matching +construct because 1) question marks are rare in older regular +expressions, and 2) whenever you see one, you should stop and +"question" exactly what is going on. That's psychology... =over 10 =item C<(?#text)> -A comment. The text is ignored. If the C</x> switch is used to enable -whitespace formatting, a simple C<#> will suffice. Note that perl closes +A comment. The text is ignored. If the C</x> modifier enables +whitespace formatting, a simple C<#> will suffice. Note that Perl closes the comment as soon as it sees a C<)>, so there is no way to put a literal C<)> in the comment. +=item C<(?imsx-imsx)> + +One or more embedded pattern-match modifiers. This is particularly +useful for dynamic patterns, such as those read in from a configuration +file, read in as an argument, are specified in a table somewhere, +etc. Consider the case that some of which want to be case sensitive +and some do not. The case insensitive ones need to include merely +C<(?i)> at the front of the pattern. For example: + + $pattern = "foobar"; + if ( /$pattern/i ) { } + + # more flexible: + + $pattern = "(?i)foobar"; + if ( /$pattern/ ) { } + +Letters after a C<-> turn those modifiers off. These modifiers are +localized inside an enclosing group (if any). For example, + + ( (?i) blah ) \s+ \1 + +will match a repeated (I<including the case>!) word C<blah> in any +case, assuming C<x> modifier, and no C<i> modifier outside this +group. + =item C<(?:pattern)> =item C<(?imsx-imsx:pattern)> @@ -293,28 +460,29 @@ is like @fields = split(/\b(a|b|c)\b/) -but doesn't spit out extra fields. +but doesn't spit out extra fields. It's also cheaper not to capture +characters if you don't need to. -The letters between C<?> and C<:> act as flags modifiers, see -L<C<(?imsx-imsx)>>. In particular, +Any letters between C<?> and C<:> act as flags modifiers as with +C<(?imsx-imsx)>. For example, /(?s-i:more.*than).*million/i -is equivalent to more verbose +is equivalent to the more verbose /(?:(?s-i)more.*than).*million/i =item C<(?=pattern)> -A zero-width positive lookahead assertion. For example, C</\w+(?=\t)/> +A zero-width positive look-ahead assertion. For example, C</\w+(?=\t)/> matches a word followed by a tab, without including the tab in C<$&>. =item C<(?!pattern)> -A zero-width negative lookahead assertion. For example C</foo(?!bar)/> +A zero-width negative look-ahead assertion. For example C</foo(?!bar)/> matches any occurrence of "foo" that isn't followed by "bar". Note -however that lookahead and lookbehind are NOT the same thing. You cannot -use this for lookbehind. +however that look-ahead and look-behind are NOT the same thing. You cannot +use this for look-behind. If you are looking for a "bar" that isn't preceded by a "foo", C</(?!foo)bar/> will not do what you want. That's because the C<(?!foo)> is just saying that @@ -326,29 +494,32 @@ Sometimes it's still easier just to say: if (/bar/ && $` !~ /foo$/) -For lookbehind see below. +For look-behind see below. -=item C<(?E<lt>=pattern)> +=item C<(?<=pattern)> -A zero-width positive lookbehind assertion. For example, C</(?E<lt>=\t)\w+/> -matches a word following a tab, without including the tab in C<$&>. -Works only for fixed-width lookbehind. +A zero-width positive look-behind assertion. For example, C</(?<=\t)\w+/> +matches a word that follows a tab, without including the tab in C<$&>. +Works only for fixed-width look-behind. =item C<(?<!pattern)> -A zero-width negative lookbehind assertion. For example C</(?<!bar)foo/> -matches any occurrence of "foo" that isn't following "bar". -Works only for fixed-width lookbehind. +A zero-width negative look-behind assertion. For example C</(?<!bar)foo/> +matches any occurrence of "foo" that does not follow "bar". Works +only for fixed-width look-behind. =item C<(?{ code })> -Experimental "evaluate any Perl code" zero-width assertion. Always -succeeds. C<code> is not interpolated. Currently the rules to -determine where the C<code> ends are somewhat convoluted. +B<WARNING>: This extended regular expression feature is considered +highly experimental, and may be changed or deleted without notice. + +This zero-width assertion evaluate any embedded Perl code. It +always succeeds, and its C<code> is not interpolated. Currently, +the rules to determine where the C<code> ends are somewhat convoluted. -The C<code> is properly scoped in the following sense: if the assertion -is backtracked (compare L<"Backtracking">), all the changes introduced after -C<local>isation are undone, so +The C<code> is properly scoped in the following sense: If the assertion +is backtracked (compare L<"Backtracking">), all changes introduced after +C<local>ization are undone, so that $_ = 'a' x 8; m< @@ -364,117 +535,173 @@ C<local>isation are undone, so # location. >x; -will set C<$res = 4>. Note that after the match $cnt returns to the globally -introduced value 0, since the scopes which restrict C<local> statements +will set C<$res = 4>. Note that after the match, $cnt returns to the globally +introduced value, because the scopes that restrict C<local> operators are unwound. -This assertion may be used as L<C<(?(condition)yes-pattern|no-pattern)>> -switch. If I<not> used in this way, the result of evaluation of C<code> -is put into variable $^R. This happens immediately, so $^R can be used from -other C<(?{ code })> assertions inside the same regular expression. +This assertion may be used as a C<(?(condition)yes-pattern|no-pattern)> +switch. If I<not> used in this way, the result of evaluation of +C<code> is put into the special variable C<$^R>. This happens +immediately, so C<$^R> can be used from other C<(?{ code })> assertions +inside the same regular expression. -The above assignment to $^R is properly localized, thus the old value of $^R -is restored if the assertion is backtracked (compare L<"Backtracking">). +The assignment to C<$^R> above is properly localized, so the old +value of C<$^R> is restored if the assertion is backtracked; compare +L<"Backtracking">. -Due to security concerns, this construction is not allowed if the regular -expression involves run-time interpolation of variables, unless -C<use re 'eval'> pragma is used (see L<re>), or the variables contain -results of qr() operator (see L<perlop/"qr/STRING/imosx">). +For reasons of security, this construct is forbidden if the regular +expression involves run-time interpolation of variables, unless the +perilous C<use re 'eval'> pragma has been used (see L<re>), or the +variables contain results of C<qr//> operator (see +L<perlop/"qr/STRING/imosx">). -This restriction is due to the wide-spread (questionable) practice of -using the construct +This restriction is because of the wide-spread and remarkably convenient +custom of using run-time determined strings as patterns. For example: $re = <>; chomp $re; $string =~ /$re/; -without tainting. While this code is frowned upon from security point -of view, when C<(?{})> was introduced, it was considered bad to add -I<new> security holes to existing scripts. - -B<NOTE:> Use of the above insecure snippet without also enabling taint mode -is to be severely frowned upon. C<use re 'eval'> does not disable tainting -checks, thus to allow $re in the above snippet to contain C<(?{})> -I<with tainting enabled>, one needs both C<use re 'eval'> and untaint -the $re. - -=item C<(?E<gt>pattern)> - -An "independent" subexpression. Matches the substring that a -I<standalone> C<pattern> would match if anchored at the given position, -B<and only this substring>. - -Say, C<^(?E<gt>a*)ab> will never match, since C<(?E<gt>a*)> (anchored -at the beginning of string, as above) will match I<all> characters -C<a> at the beginning of string, leaving no C<a> for C<ab> to match. -In contrast, C<a*ab> will match the same as C<a+b>, since the match of -the subgroup C<a*> is influenced by the following group C<ab> (see -L<"Backtracking">). In particular, C<a*> inside C<a*ab> will match -fewer characters than a standalone C<a*>, since this makes the tail match. - -An effect similar to C<(?E<gt>pattern)> may be achieved by - - (?=(pattern))\1 - -since the lookahead is in I<"logical"> context, thus matches the same -substring as a standalone C<a+>. The following C<\1> eats the matched -string, thus making a zero-length assertion into an analogue of -C<(?E<gt>...)>. (The difference between these two constructs is that the -second one uses a catching group, thus shifting ordinals of -backreferences in the rest of a regular expression.) - -This construct is useful for optimizations of "eternal" -matches, because it will not backtrack (see L<"Backtracking">). +Before Perl knew how to execute interpolated code within a pattern, +this operation was completely safe from a security point of view, +although it could raise an exception from an illegal pattern. If +you turn on the C<use re 'eval'>, though, it is no longer secure, +so you should only do so if you are also using taint checking. +Better yet, use the carefully constrained evaluation within a Safe +module. See L<perlsec> for details about both these mechanisms. + +=item C<(??{ code })> + +B<WARNING>: This extended regular expression feature is considered +highly experimental, and may be changed or deleted without notice. +A simplified version of the syntax may be introduced for commonly +used idioms. + +This is a "postponed" regular subexpression. The C<code> is evaluated +at run time, at the moment this subexpression may match. The result +of evaluation is considered as a regular expression and matched as +if it were inserted instead of this construct. + +The C<code> is not interpolated. As before, the rules to determine +where the C<code> ends are currently somewhat convoluted. + +The following pattern matches a parenthesized group: + + $re = qr{ + \( + (?: + (?> [^()]+ ) # Non-parens without backtracking + | + (??{ $re }) # Group with matching parens + )* + \) + }x; + +=item C<< (?>pattern) >> + +B<WARNING>: This extended regular expression feature is considered +highly experimental, and may be changed or deleted without notice. + +An "independent" subexpression, one which matches the substring +that a I<standalone> C<pattern> would match if anchored at the given +position, and it matches I<nothing other than this substring>. This +construct is useful for optimizations of what would otherwise be +"eternal" matches, because it will not backtrack (see L<"Backtracking">). +It may also be useful in places where the "grab all you can, and do not +give anything back" semantic is desirable. + +For example: C<< ^(?>a*)ab >> will never match, since C<< (?>a*) >> +(anchored at the beginning of string, as above) will match I<all> +characters C<a> at the beginning of string, leaving no C<a> for +C<ab> to match. In contrast, C<a*ab> will match the same as C<a+b>, +since the match of the subgroup C<a*> is influenced by the following +group C<ab> (see L<"Backtracking">). In particular, C<a*> inside +C<a*ab> will match fewer characters than a standalone C<a*>, since +this makes the tail match. + +An effect similar to C<< (?>pattern) >> may be achieved by writing +C<(?=(pattern))\1>. This matches the same substring as a standalone +C<a+>, and the following C<\1> eats the matched string; it therefore +makes a zero-length assertion into an analogue of C<< (?>...) >>. +(The difference between these two constructs is that the second one +uses a capturing group, thus shifting ordinals of backreferences +in the rest of a regular expression.) + +Consider this pattern: m{ \( ( - [^()]+ + [^()]+ # x+ | \( [^()]* \) )+ \) }x -That will efficiently match a nonempty group with matching -two-or-less-level-deep parentheses. However, if there is no such group, -it will take virtually forever on a long string. That's because there are -so many different ways to split a long string into several substrings. -This is what C<(.+)+> is doing, and C<(.+)+> is similar to a subpattern -of the above pattern. Consider that the above pattern detects no-match -on C<((()aaaaaaaaaaaaaaaaaa> in several seconds, but that each extra -letter doubles this time. This exponential performance will make it -appear that your program has hung. - -However, a tiny modification of this pattern +That will efficiently match a nonempty group with matching parentheses +two levels deep or less. However, if there is no such group, it +will take virtually forever on a long string. That's because there +are so many different ways to split a long string into several +substrings. This is what C<(.+)+> is doing, and C<(.+)+> is similar +to a subpattern of the above pattern. Consider how the pattern +above detects no-match on C<((()aaaaaaaaaaaaaaaaaa> in several +seconds, but that each extra letter doubles this time. This +exponential performance will make it appear that your program has +hung. However, a tiny change to this pattern m{ \( ( - (?> [^()]+ ) + (?> [^()]+ ) # change x+ above to (?> x+ ) | \( [^()]* \) )+ \) }x -which uses C<(?E<gt>...)> matches exactly when the one above does (verifying +which uses C<< (?>...) >> matches exactly when the one above does (verifying this yourself would be a productive exercise), but finishes in a fourth the time when used on a similar string with 1000000 C<a>s. Be aware, however, that this pattern currently triggers a warning message under -B<-w> saying it C<"matches the null string many times">): +the C<use warnings> pragma or B<-w> switch saying it +C<"matches the null string many times">): -On simple groups, such as the pattern C<(?E<gt> [^()]+ )>, a comparable -effect may be achieved by negative lookahead, as in C<[^()]+ (?! [^()] )>. +On simple groups, such as the pattern C<< (?> [^()]+ ) >>, a comparable +effect may be achieved by negative look-ahead, as in C<[^()]+ (?! [^()] )>. This was only 4 times slower on a string with 1000000 C<a>s. +The "grab all you can, and do not give anything back" semantic is desirable +in many situations where on the first sight a simple C<()*> looks like +the correct solution. Suppose we parse text with comments being delimited +by C<#> followed by some optional (horizontal) whitespace. Contrary to +its appearence, C<#[ \t]*> I<is not> the correct subexpression to match +the comment delimiter, because it may "give up" some whitespace if +the remainder of the pattern can be made to match that way. The correct +answer is either one of these: + + (?>#[ \t]*) + #[ \t]*(?![ \t]) + +For example, to grab non-empty comments into $1, one should use either +one of these: + + / (?> \# [ \t]* ) ( .+ ) /x; + / \# [ \t]* ( [^ \t] .* ) /x; + +Which one you pick depends on which of these expressions better reflects +the above specification of comments. + =item C<(?(condition)yes-pattern|no-pattern)> =item C<(?(condition)yes-pattern)> +B<WARNING>: This extended regular expression feature is considered +highly experimental, and may be changed or deleted without notice. + Conditional expression. C<(condition)> should be either an integer in parentheses (which is valid if the corresponding pair of parentheses -matched), or lookahead/lookbehind/evaluate zero-width assertion. +matched), or look-ahead/look-behind/evaluate zero-width assertion. -Say, +For example: m{ ( \( )? [^()]+ @@ -484,45 +711,20 @@ Say, matches a chunk of non-parentheses, possibly included in parentheses themselves. -=item C<(?imsx-imsx)> - -One or more embedded pattern-match modifiers. This is particularly -useful for patterns that are specified in a table somewhere, some of -which want to be case sensitive, and some of which don't. The case -insensitive ones need to include merely C<(?i)> at the front of the -pattern. For example: - - $pattern = "foobar"; - if ( /$pattern/i ) { } - - # more flexible: - - $pattern = "(?i)foobar"; - if ( /$pattern/ ) { } - -Letters after C<-> switch modifiers off. - -These modifiers are localized inside an enclosing group (if any). Say, - - ( (?i) blah ) \s+ \1 - -(assuming C<x> modifier, and no C<i> modifier outside of this group) -will match a repeated (I<including the case>!) word C<blah> in any -case. - =back -A question mark was chosen for this and for the new minimal-matching -construct because 1) question mark is pretty rare in older regular -expressions, and 2) whenever you see one, you should stop and "question" -exactly what is going on. That's psychology... - =head2 Backtracking +NOTE: This section presents an abstract approximation of regular +expression behavior. For a more rigorous (and complicated) view of +the rules involved in selecting a match among possible alternatives, +see L<Combining pieces together>. + A fundamental feature of regular expression matching involves the notion called I<backtracking>, which is currently used (when needed) by all regular expression quantifiers, namely C<*>, C<*?>, C<+>, -C<+?>, C<{n,m}>, and C<{n,m}?>. +C<+?>, C<{n,m}>, and C<{n,m}?>. Backtracking is often optimized +internally, but the general principle outlined here is valid. For a regular expression to match, the I<entire> regular expression must match, not just part of it. So if the beginning of a pattern containing a @@ -561,7 +763,7 @@ Which perhaps unexpectedly yields: got <d is under the bar in the > That's because C<.*> was greedy, so you get everything between the -I<first> "foo" and the I<last> "bar". In this case, it's more effective +I<first> "foo" and the I<last> "bar". Here it's more effective to use minimal matching to make sure you get the text between a "foo" and the first "bar" thereafter. @@ -624,7 +826,7 @@ definition might succeed against a particular string. And if there are multiple ways it might succeed, you need to understand backtracking to know which variety of success you will achieve. -When using lookahead assertions and negations, this can all get even +When using look-ahead assertions and negations, this can all get even tricker. Imagine you'd like to find a sequence of non-digits not followed by "123". You might try to write that as @@ -660,8 +862,9 @@ that you've asked "Is it true that at the start of $x, following 0 or more non-digits, you have something that's not 123?" If the pattern matcher had let C<\D*> expand to "ABC", this would have caused the whole pattern to fail. + The search engine will initially match C<\D*> with "ABC". Then it will -try to match C<(?!123> with "123", which of course fails. But because +try to match C<(?!123> with "123", which fails. But because a quantifier (C<\D*>) has been used in the regular expression, the search engine can backtrack and retry the match differently in the hope of matching the complete regular expression. @@ -669,13 +872,13 @@ in the hope of matching the complete regular expression. The pattern really, I<really> wants to succeed, so it uses the standard pattern back-off-and-retry and lets C<\D*> expand to just "AB" this time. Now there's indeed something following "AB" that is not -"123". It's in fact "C123", which suffices. +"123". It's "C123", which suffices. -We can deal with this by using both an assertion and a negation. We'll -say that the first part in $1 must be followed by a digit, and in fact, it -must also be followed by something that's not "123". Remember that the -lookaheads are zero-width expressions--they only look, but don't consume -any of the string in their match. So rewriting this way produces what +We can deal with this by using both an assertion and a negation. +We'll say that the first part in $1 must be followed both by a digit +and by something that's not "123". Remember that the look-aheads +are zero-width expressions--they only look, but don't consume any +of the string in their match. So rewriting this way produces what you'd expect; that is, case 5 will fail, but case 6 succeeds: print "5: got $1\n" if $x =~ /^(\D*)(?=\d)(?!123)/ ; @@ -684,7 +887,7 @@ you'd expect; that is, case 5 will fail, but case 6 succeeds: 6: got ABC In other words, the two zero-width assertions next to each other work as though -they're ANDed together, just as you'd use any builtin assertions: C</^$/> +they're ANDed together, just as you'd use any built-in assertions: C</^$/> matches only if you're at the beginning of the line AND the end of the line simultaneously. The deeper underlying truth is that juxtaposition in regular expressions always means AND, except when you write an explicit OR @@ -692,23 +895,25 @@ using the vertical bar. C</ab/> means match "a" AND (then) match "b", although the attempted matches are made at different positions because "a" is not a zero-width assertion, but a one-width assertion. -One warning: particularly complicated regular expressions can take -exponential time to solve due to the immense number of possible ways they -can use backtracking to try match. For example this will take a very long -time to run +B<WARNING>: particularly complicated regular expressions can take +exponential time to solve because of the immense number of possible +ways they can use backtracking to try match. For example, without +internal optimizations done by the regular expression engine, this will +take a painfully long time to run: - /((a{0,5}){0,5}){0,5}/ + 'aaaaaaaaaaaa' =~ /((a{0,5}){0,5}){0,5}[c]/ -And if you used C<*>'s instead of limiting it to 0 through 5 matches, then -it would take literally forever--or until you ran out of stack space. +And if you used C<*>'s instead of limiting it to 0 through 5 matches, +then it would take forever--or until you ran out of stack space. -A powerful tool for optimizing such beasts is "independent" groups, -which do not backtrace (see L<C<(?E<gt>pattern)>>). Note also that -zero-length lookahead/lookbehind assertions will not backtrace to make -the tail match, since they are in "logical" context: only the fact -whether they match or not is considered relevant. For an example -where side-effects of a lookahead I<might> have influenced the -following match, see L<C<(?E<gt>pattern)>>. +A powerful tool for optimizing such beasts is what is known as an +"independent group", +which does not backtrack (see L<C<< (?>pattern) >>>). Note also that +zero-length look-ahead/look-behind assertions will not backtrack to make +the tail match, since they are in "logical" context: only +whether they match is considered relevant. For an example +where side-effects of look-ahead I<might> have influenced the +following match, see L<C<< (?>pattern) >>>. =head2 Version 8 Regular Expressions @@ -726,13 +931,18 @@ would match "blurfl" in the target string. You can specify a character class, by enclosing a list of characters in C<[]>, which will match any one character from the list. If the first character after the "[" is "^", the class matches any character not -in the list. Within a list, the "-" character is used to specify a +in the list. Within a list, the "-" character specifies a range, so that C<a-z> represents all characters between "a" and "z", -inclusive. If you want "-" itself to be a member of a class, put it -at the start or end of the list, or escape it with a backslash. (The +inclusive. If you want either "-" or "]" itself to be a member of a +class, put it at the start of the list (possibly after a "^"), or +escape it with a backslash. "-" is also taken literally when it is +at the end of the list, just before the closing "]". (The following all specify the same class of three characters: C<[-az]>, C<[az-]>, and C<[a\-z]>. All are different from C<[a-z]>, which specifies a class containing twenty-six characters.) +Also, if you try to use the character classes C<\w>, C<\W>, C<\s>, +C<\S>, C<\d>, or C<\D> as endpoints of a range, that's not a range, +the "-" is understood literally. Note also that the whole range idea is rather unportable between character sets--and even within character sets they may cause results @@ -756,8 +966,8 @@ or "foe" in the target string (as would C<f(e|i|o)e>). The first alternative includes everything from the last pattern delimiter ("(", "[", or the beginning of the pattern) up to the first "|", and the last alternative contains everything from the last "|" to the next -pattern delimiter. For this reason, it's common practice to include -alternatives in parentheses, to minimize confusion about where they +pattern delimiter. That's why it's common practice to include +alternatives in parentheses: to minimize confusion about where they start and end. Alternatives are tried from left to right, so the first @@ -771,18 +981,18 @@ important when you are capturing matched text using parentheses.) Also remember that "|" is interpreted as a literal within square brackets, so if you write C<[fee|fie|foe]> you're really only matching C<[feio|]>. -Within a pattern, you may designate subpatterns for later reference by -enclosing them in parentheses, and you may refer back to the I<n>th -subpattern later in the pattern using the metacharacter \I<n>. -Subpatterns are numbered based on the left to right order of their -opening parenthesis. A backreference matches whatever -actually matched the subpattern in the string being examined, not the -rules for that subpattern. Therefore, C<(0|0x)\d*\s\1\d*> will -match "0x1234 0x4321", but not "0x1234 01234", because subpattern 1 -actually matched "0x", even though the rule C<0|0x> could -potentially match the leading 0 in the second number. +Within a pattern, you may designate subpatterns for later reference +by enclosing them in parentheses, and you may refer back to the +I<n>th subpattern later in the pattern using the metacharacter +\I<n>. Subpatterns are numbered based on the left to right order +of their opening parenthesis. A backreference matches whatever +actually matched the subpattern in the string being examined, not +the rules for that subpattern. Therefore, C<(0|0x)\d*\s\1\d*> will +match "0x1234 0x4321", but not "0x1234 01234", because subpattern +1 matched "0x", even though the rule C<0|0x> could potentially match +the leading 0 in the second number. -=head2 WARNING on \1 vs $1 +=head2 Warning on \1 vs $1 Some people get too used to writing things like: @@ -803,13 +1013,13 @@ Or if you try to do s/(\d+)/\1000/; You can't disambiguate that by saying C<\{1}000>, whereas you can fix it with -C<${1}000>. Basically, the operation of interpolation should not be confused +C<${1}000>. The operation of interpolation should not be confused with the operation of matching a backreference. Certainly they mean two different things on the I<left> side of the C<s///>. =head2 Repeated patterns matching zero-length substring -WARNING: Difficult material (and prose) ahead. This section needs a rewrite. +B<WARNING>: Difficult material (and prose) ahead. This section needs a rewrite. Regular expressions provide a terse and powerful programming language. As with most other power tools, power comes together with the ability @@ -822,7 +1032,7 @@ loops using regular expressions, with something as innocuous as: The C<o?> can match at the beginning of C<'foo'>, and since the position in the string is not moved by the match, C<o?> would match again and again -due to the C<*> modifier. Another common way to create a similar cycle +because of the C<*> modifier. Another common way to create a similar cycle is with the looping modifier C<//g>: @matches = ( 'foo' =~ m{ o? }xg ); @@ -834,19 +1044,20 @@ or or the loop implied by split(). However, long experience has shown that many programming tasks may -be significantly simplified by using repeated subexpressions which -may match zero-length substrings, with a simple example being: +be significantly simplified by using repeated subexpressions that +may match zero-length substrings. Here's a simple example being: @chars = split //, $string; # // is not magic in split ($whitewashed = $string) =~ s/()/ /g; # parens avoid magic s// / -Thus Perl allows the C</()/> construct, which I<forcefully breaks +Thus Perl allows such constructs, by I<forcefully breaking the infinite loop>. The rules for this are different for lower-level loops given by the greedy modifiers C<*+{}>, and for higher-level ones like the C</g> modifier or split() operator. -The lower-level loops are I<interrupted> when it is detected that a -repeated expression did match a zero-length substring, thus +The lower-level loops are I<interrupted> (that is, the loop is +broken) when Perl detects that a repeated expression matched a +zero-length substring. Thus m{ (?: NON_ZERO_LENGTH | ZERO_LENGTH )* }x; @@ -864,7 +1075,7 @@ This prohibition interacts with backtracking (see L<"Backtracking">), and so the I<second best> match is chosen if the I<best> match is of zero length. -Say, +For example: $_ = 'bar'; s/\w??/<$&>/g; @@ -877,8 +1088,111 @@ alternate with one-character-long matches. Similarly, for repeated C<m/()/g> the second-best match is the match at the position one notch further in the string. -The additional state of being I<matched with zero-length> is associated to +The additional state of being I<matched with zero-length> is associated with the matched string, and is reset by each assignment to pos(). +Zero-length matches at the end of the previous match are ignored +during C<split>. + +=head2 Combining pieces together + +Each of the elementary pieces of regular expressions which were described +before (such as C<ab> or C<\Z>) could match at most one substring +at the given position of the input string. However, in a typical regular +expression these elementary pieces are combined into more complicated +patterns using combining operators C<ST>, C<S|T>, C<S*> etc +(in these examples C<S> and C<T> are regular subexpressions). + +Such combinations can include alternatives, leading to a problem of choice: +if we match a regular expression C<a|ab> against C<"abc">, will it match +substring C<"a"> or C<"ab">? One way to describe which substring is +actually matched is the concept of backtracking (see L<"Backtracking">). +However, this description is too low-level and makes you think +in terms of a particular implementation. + +Another description starts with notions of "better"/"worse". All the +substrings which may be matched by the given regular expression can be +sorted from the "best" match to the "worst" match, and it is the "best" +match which is chosen. This substitutes the question of "what is chosen?" +by the question of "which matches are better, and which are worse?". + +Again, for elementary pieces there is no such question, since at most +one match at a given position is possible. This section describes the +notion of better/worse for combining operators. In the description +below C<S> and C<T> are regular subexpressions. + +=over + +=item C<ST> + +Consider two possible matches, C<AB> and C<A'B'>, C<A> and C<A'> are +substrings which can be matched by C<S>, C<B> and C<B'> are substrings +which can be matched by C<T>. + +If C<A> is better match for C<S> than C<A'>, C<AB> is a better +match than C<A'B'>. + +If C<A> and C<A'> coincide: C<AB> is a better match than C<AB'> if +C<B> is better match for C<T> than C<B'>. + +=item C<S|T> + +When C<S> can match, it is a better match than when only C<T> can match. + +Ordering of two matches for C<S> is the same as for C<S>. Similar for +two matches for C<T>. + +=item C<S{REPEAT_COUNT}> + +Matches as C<SSS...S> (repeated as many times as necessary). + +=item C<S{min,max}> + +Matches as C<S{max}|S{max-1}|...|S{min+1}|S{min}>. + +=item C<S{min,max}?> + +Matches as C<S{min}|S{min+1}|...|S{max-1}|S{max}>. + +=item C<S?>, C<S*>, C<S+> + +Same as C<S{0,1}>, C<S{0,BIG_NUMBER}>, C<S{1,BIG_NUMBER}> respectively. + +=item C<S??>, C<S*?>, C<S+?> + +Same as C<S{0,1}?>, C<S{0,BIG_NUMBER}?>, C<S{1,BIG_NUMBER}?> respectively. + +=item C<< (?>S) >> + +Matches the best match for C<S> and only that. + +=item C<(?=S)>, C<(?<=S)> + +Only the best match for C<S> is considered. (This is important only if +C<S> has capturing parentheses, and backreferences are used somewhere +else in the whole regular expression.) + +=item C<(?!S)>, C<(?<!S)> + +For this grouping operator there is no need to describe the ordering, since +only whether or not C<S> can match is important. + +=item C<(??{ EXPR })> + +The ordering is the same as for the regular expression which is +the result of EXPR. + +=item C<(?(condition)yes-pattern|no-pattern)> + +Recall that which of C<yes-pattern> or C<no-pattern> actually matches is +already determined. The ordering of the matches is the same as for the +chosen subexpression. + +=back + +The above recipes describe the ordering of matches I<at a given position>. +One more rule is needed to understand how a match is determined for the +whole regular expression: a match at an earlier position is always better +than a match at a later position. =head2 Creating custom RE engines @@ -927,14 +1241,26 @@ part of this regular expression needs to be converted explicitly $re = customre::convert $re; /\Y|$re\Y|/; -=head2 SEE ALSO +=head1 BUGS + +This document varies from difficult to understand to completely +and utterly opaque. The wandering prose riddled with jargon is +hard to fathom in several places. + +This document needs a rewrite that separates the tutorial content +from the reference content. + +=head1 SEE ALSO L<perlop/"Regexp Quote-Like Operators">. L<perlop/"Gory details of parsing quoted constructs">. +L<perlfaq6>. + L<perlfunc/pos>. L<perllocale>. -I<Mastering Regular Expressions> (see L<perlbook>) by Jeffrey Friedl. +I<Mastering Regular Expressions> by Jeffrey Friedl, published +by O'Reilly and Associates. diff --git a/contrib/perl5/pod/perlref.pod b/contrib/perl5/pod/perlref.pod index 596ff72..2727e95 100644 --- a/contrib/perl5/pod/perlref.pod +++ b/contrib/perl5/pod/perlref.pod @@ -21,7 +21,7 @@ hashes of arrays, arrays of hashes of functions, and so on. Hard references are smart--they keep track of reference counts for you, automatically freeing the thing referred to when its reference count goes -to zero. (Note: the reference counts for values in self-referential or +to zero. (Reference counts for values in self-referential or cyclic data structures may not go to zero without a little help; see L<perlobj/"Two-Phased Garbage Collection"> for a detailed explanation.) If that thing happens to be an object, the object is destructed. See @@ -31,7 +31,7 @@ have been officially "blessed" into a class package.) Symbolic references are names of variables or other objects, just as a symbolic link in a Unix filesystem contains merely the name of a file. -The C<*glob> notation is a kind of symbolic reference. (Symbolic +The C<*glob> notation is something of a of symbolic reference. (Symbolic references are sometimes called "soft references", but please don't call them that; references are confusing enough without useless synonyms.) @@ -56,8 +56,8 @@ References can be created in several ways. =item 1. By using the backslash operator on a variable, subroutine, or value. -(This works much like the & (address-of) operator in C.) Note -that this typically creates I<ANOTHER> reference to a variable, because +(This works much like the & (address-of) operator in C.) +This typically creates I<another> reference to a variable, because there's already a reference to the variable in the symbol table. But the symbol table reference might go away, and you'll still have the reference that the backslash returned. Here are some examples: @@ -84,10 +84,10 @@ brackets: Here we've created a reference to an anonymous array of three elements whose final element is itself a reference to another anonymous array of three elements. (The multidimensional syntax described later can be used to -access this. For example, after the above, C<$arrayref-E<gt>[2][1]> would have +access this. For example, after the above, C<< $arrayref->[2][1] >> would have the value "b".) -Note that taking a reference to an enumerated list is not the same +Taking a reference to an enumerated list is not the same as using square brackets--instead it's the same as creating a list of references! @@ -136,7 +136,7 @@ On the other hand, if you want the other meaning, you can do this: sub showem { {; @_ } } # ok sub showem { { return @_ } } # ok -Note how the leading C<+{> and C<{;> always serve to disambiguate +The leading C<+{> and C<{;> always serve to disambiguate the expression to mean either the HASH reference, or the BLOCK. =item 4. @@ -146,18 +146,18 @@ C<sub> without a subname: $coderef = sub { print "Boink!\n" }; -Note the presence of the semicolon. Except for the fact that the code -inside isn't executed immediately, a C<sub {}> is not so much a +Note the semicolon. Except for the code +inside not being immediately executed, a C<sub {}> is not so much a declaration as it is an operator, like C<do{}> or C<eval{}>. (However, no matter how many times you execute that particular line (unless you're in an -C<eval("...")>), C<$coderef> will still have a reference to the I<SAME> +C<eval("...")>), $coderef will still have a reference to the I<same> anonymous subroutine.) Anonymous subroutines act as closures with respect to my() variables, -that is, variables visible lexically within the current scope. Closure +that is, variables lexically visible within the current scope. Closure is a notion out of the Lisp world that says if you define an anonymous function in a particular lexical context, it pretends to run in that -context even when it's called outside of the context. +context even when it's called outside the context. In human terms, it's a funny way of passing arguments to a subroutine when you define it as well as when you call it. It's useful for setting up @@ -165,11 +165,9 @@ little bits of code to run later, such as callbacks. You can even do object-oriented stuff with it, though Perl already provides a different mechanism to do that--see L<perlobj>. -You can also think of closure as a way to write a subroutine template without -using eval. (In fact, in version 5.000, eval was the I<only> way to get -closures. You may wish to use "require 5.001" if you use closures.) - -Here's a small example of how closures works: +You might also think of closure as a way to write a subroutine +template without using eval(). Here's a small example of how +closures work: sub newprint { my $x = shift; @@ -188,10 +186,10 @@ This prints Howdy, world! Greetings, earthlings! -Note particularly that $x continues to refer to the value passed into -newprint() I<despite> the fact that the "my $x" has seemingly gone out of -scope by the time the anonymous subroutine runs. That's what closure -is all about. +Note particularly that $x continues to refer to the value passed +into newprint() I<despite> "my $x" having gone out of scope by the +time the anonymous subroutine runs. That's what a closure is all +about. This applies only to lexical variables, by the way. Dynamic variables continue to work as they have always worked. Closure is not something @@ -200,7 +198,7 @@ that most Perl programmers need trouble themselves about to begin with. =item 5. References are often returned by special subroutines called constructors. -Perl objects are just references to a special kind of object that happens to know +Perl objects are just references to a special type of object that happens to know which package it's associated with. Constructors are just special subroutines that know how to create that association. They do so by starting with an ordinary reference, and it remains an ordinary reference @@ -241,35 +239,37 @@ known as foo). $ioref = *STDIN{IO}; $globref = *foo{GLOB}; -All of these are self-explanatory except for *foo{IO}. It returns the -IO handle, used for file handles (L<perlfunc/open>), sockets -(L<perlfunc/socket> and L<perlfunc/socketpair>), and directory handles -(L<perlfunc/opendir>). For compatibility with previous versions of -Perl, *foo{FILEHANDLE} is a synonym for *foo{IO}. +All of these are self-explanatory except for C<*foo{IO}>. It returns +the IO handle, used for file handles (L<perlfunc/open>), sockets +(L<perlfunc/socket> and L<perlfunc/socketpair>), and directory +handles (L<perlfunc/opendir>). For compatibility with previous +versions of Perl, C<*foo{FILEHANDLE}> is a synonym for C<*foo{IO}>. -*foo{THING} returns undef if that particular THING hasn't been used yet, -except in the case of scalars. *foo{SCALAR} returns a reference to an +C<*foo{THING}> returns undef if that particular THING hasn't been used yet, +except in the case of scalars. C<*foo{SCALAR}> returns a reference to an anonymous scalar if $foo hasn't been used yet. This might change in a future release. -*foo{IO} is an alternative to the \*HANDLE mechanism given in +C<*foo{IO}> is an alternative to the C<*HANDLE> mechanism given in L<perldata/"Typeglobs and Filehandles"> for passing filehandles into or out of subroutines, or storing into larger data structures. Its disadvantage is that it won't create a new filehandle for you. -Its advantage is that you have no risk of clobbering more than you want -to with a typeglob assignment, although if you assign to a scalar instead -of a typeglob, you're ok. +Its advantage is that you have less risk of clobbering more than +you want to with a typeglob assignment. (It still conflates file +and directory handles, though.) However, if you assign the incoming +value to a scalar instead of a typeglob as we do in the examples +below, there's no risk of that happening. - splutter(*STDOUT); - splutter(*STDOUT{IO}); + splutter(*STDOUT); # pass the whole glob + splutter(*STDOUT{IO}); # pass both file and dir handles sub splutter { my $fh = shift; print $fh "her um well a hmmm\n"; } - $rec = get_rec(*STDIN); - $rec = get_rec(*STDIN{IO}); + $rec = get_rec(*STDIN); # pass the whole glob + $rec = get_rec(*STDIN{IO}); # pass both file and dir handles sub get_rec { my $fh = shift; @@ -299,9 +299,9 @@ a simple scalar variable containing a reference of the correct type: &$coderef(1,2,3); print $globref "output\n"; -It's important to understand that we are specifically I<NOT> dereferencing +It's important to understand that we are specifically I<not> dereferencing C<$arrayref[0]> or C<$hashref{"KEY"}> there. The dereference of the -scalar variable happens I<BEFORE> it does any key lookups. Anything more +scalar variable happens I<before> it does any key lookups. Anything more complicated than a simple scalar variable must use methods 2 or 3 below. However, a "simple scalar" includes an identifier that itself uses method 1 recursively. Therefore, the following prints "howdy". @@ -334,7 +334,7 @@ people often make the mistake of viewing the dereferencing symbols as proper operators, and wonder about their precedence. If they were, though, you could use parentheses instead of braces. That's not the case. Consider the difference below; case 0 is a short-hand version of case 1, -I<NOT> case 2: +I<not> case 2: $$hashref{"KEY"} = "VALUE"; # CASE 0 ${$hashref}{"KEY"} = "VALUE"; # CASE 1 @@ -356,8 +356,8 @@ syntactic sugar, the examples for method 2 may be written: $coderef->(1,2,3); # Subroutine call The left side of the arrow can be any expression returning a reference, -including a previous dereference. Note that C<$array[$x]> is I<NOT> the -same thing as C<$array-E<gt>[$x]> here: +including a previous dereference. Note that C<$array[$x]> is I<not> the +same thing as C<< $array->[$x] >> here: $array[$x]->{"foo"}->[0] = "January"; @@ -365,11 +365,11 @@ This is one of the cases we mentioned earlier in which references could spring into existence when in an lvalue context. Before this statement, C<$array[$x]> may have been undefined. If so, it's automatically defined with a hash reference so that we can look up -C<{"foo"}> in it. Likewise C<$array[$x]-E<gt>{"foo"}> will automatically get +C<{"foo"}> in it. Likewise C<< $array[$x]->{"foo"} >> will automatically get defined with an array reference so that we can look up C<[0]> in it. This process is called I<autovivification>. -One more thing here. The arrow is optional I<BETWEEN> brackets +One more thing here. The arrow is optional I<between> brackets subscripts, so you can shrink the above down to $array[$x]{"foo"}[0] = "January"; @@ -394,14 +394,27 @@ civility though. =back -The ref() operator may be used to determine what type of thing the -reference is pointing to. See L<perlfunc>. +Using a string or number as a reference produces a symbolic reference, +as explained above. Using a reference as a number produces an +integer representing its storage location in memory. The only +useful thing to be done with this is to compare two references +numerically to see whether they refer to the same location. + + if ($ref1 == $ref2) { # cheap numeric compare of references + print "refs 1 and 2 refer to the same thing\n"; + } + +Using a reference as a string produces both its referent's type, +including any package blessing as described in L<perlobj>, as well +as the numeric address expressed in hex. The ref() operator returns +just the type of thing the reference is pointing to, without the +address. See L<perlfunc/ref> for details and examples of its use. The bless() operator may be used to associate the object a reference points to with a package functioning as an object class. See L<perlobj>. A typeglob may be dereferenced the same way a reference can, because -the dereference syntax always indicates the kind of reference desired. +the dereference syntax always indicates the type of reference desired. So C<${*foo}> and C<${\$foo}> both indicate the same scalar variable. Here's a trick for interpolating a subroutine call into a string: @@ -421,9 +434,9 @@ chicanery is also useful for arbitrary expressions: We said that references spring into existence as necessary if they are undefined, but we didn't say what happens if a value used as a -reference is already defined, but I<ISN'T> a hard reference. If you -use it as a reference in this case, it'll be treated as a symbolic -reference. That is, the value of the scalar is taken to be the I<NAME> +reference is already defined, but I<isn't> a hard reference. If you +use it as a reference, it'll be treated as a symbolic +reference. That is, the value of the scalar is taken to be the I<name> of a variable, rather than a direct link to a (possibly) anonymous value. @@ -439,7 +452,7 @@ People frequently expect it to work like this. So it does. $pack = "THAT"; ${"${pack}::$name"} = 5; # Sets $THAT::foo without eval -This is very powerful, and slightly dangerous, in that it's possible +This is powerful, and slightly dangerous, in that it's possible to intend (with the utmost sincerity) to use a hard reference, and accidentally use a symbolic reference instead. To protect against that, you can say @@ -474,7 +487,7 @@ always have within a string. That is, $push = "pop on "; print "${push}over"; -has always meant to print "pop on over", despite the fact that push is +has always meant to print "pop on over", even though push is a reserved word. This has been generalized to work the same outside of quotes, so that @@ -485,7 +498,7 @@ and even print ${ push } . "over"; will have the same effect. (This would have been a syntax error in -Perl 5.000, though Perl 4 allowed it in the spaceless form.) Note that this +Perl 5.000, though Perl 4 allowed it in the spaceless form.) This construct is I<not> considered to be a symbolic reference when you're using strict refs: @@ -515,16 +528,17 @@ makes it more than a bareword: $array{ +shift } $array{ shift @_ } -The B<-w> switch will warn you if it interprets a reserved word as a string. +The C<use warnings> pragma or the B<-w> switch will warn you if it +interprets a reserved word as a string. But it will no longer warn you about using lowercase words, because the string is effectively quoted. =head2 Pseudo-hashes: Using an array as a hash -WARNING: This section describes an experimental feature. Details may +B<WARNING>: This section describes an experimental feature. Details may change without notice in future versions. -Beginning with release 5.005 of Perl you can use an array reference +Beginning with release 5.005 of Perl, you may use an array reference in some contexts that would normally require a hash reference. This allows you to access array elements using symbolic names, as if they were fields in a structure. @@ -533,23 +547,58 @@ For this to work, the array must contain extra information. The first element of the array has to be a hash reference that maps field names to array indices. Here is an example: - $struct = [{foo => 1, bar => 2}, "FOO", "BAR"]; + $struct = [{foo => 1, bar => 2}, "FOO", "BAR"]; - $struct->{foo}; # same as $struct->[1], i.e. "FOO" - $struct->{bar}; # same as $struct->[2], i.e. "BAR" + $struct->{foo}; # same as $struct->[1], i.e. "FOO" + $struct->{bar}; # same as $struct->[2], i.e. "BAR" - keys %$struct; # will return ("foo", "bar") in some order - values %$struct; # will return ("FOO", "BAR") in same some order + keys %$struct; # will return ("foo", "bar") in some order + values %$struct; # will return ("FOO", "BAR") in same some order - while (my($k,$v) = each %$struct) { + while (my($k,$v) = each %$struct) { print "$k => $v\n"; - } + } + +Perl will raise an exception if you try to access nonexistent fields. +To avoid inconsistencies, always use the fields::phash() function +provided by the C<fields> pragma. + + use fields; + $pseudohash = fields::phash(foo => "FOO", bar => "BAR"); + +For better performance, Perl can also do the translation from field +names to array indices at compile time for typed object references. +See L<fields>. + +There are two ways to check for the existence of a key in a +pseudo-hash. The first is to use exists(). This checks to see if the +given field has ever been set. It acts this way to match the behavior +of a regular hash. For instance: + + use fields; + $phash = fields::phash([qw(foo bar pants)], ['FOO']); + $phash->{pants} = undef; + + print exists $phash->{foo}; # true, 'foo' was set in the declaration + print exists $phash->{bar}; # false, 'bar' has not been used. + print exists $phash->{pants}; # true, your 'pants' have been touched + +The second is to use exists() on the hash reference sitting in the +first array element. This checks to see if the given key is a valid +field in the pseudo-hash. + + print exists $phash->[0]{bar}; # true, 'bar' is a valid field + print exists $phash->[0]{shoes};# false, 'shoes' can't be used -Perl will raise an exception if you try to delete keys from a pseudo-hash -or try to access nonexistent fields. For better performance, Perl can also -do the translation from field names to array indices at compile time for -typed object references. See L<fields>. +delete() on a pseudo-hash element only deletes the value corresponding +to the key, not the key itself. To delete the key, you'll have to +explicitly delete it from the first hash element. + print delete $phash->{foo}; # prints $phash->[1], "FOO" + print exists $phash->{foo}; # false + print exists $phash->[0]{foo}; # true, key still exists + print delete $phash->[0]{foo}; # now key is gone + print $phash->{foo}; # runtime exception =head2 Function Templates @@ -564,7 +613,7 @@ that generated HTML font changes for the various colors: print "Be ", red("careful"), "with that ", green("light"); -The red() and green() functions would be very similar. To create these, +The red() and green() functions would be similar. To create these, we'll assign a closure to a typeglob of the name of the function we're trying to build. @@ -598,7 +647,7 @@ above--only works with closures, not general subroutines. In the general case, then, named subroutines do not nest properly, although anonymous ones do. If you are accustomed to using nested subroutines in other programming languages with their own private variables, you'll have to -work at it a bit in Perl. The intuitive coding of this kind of thing +work at it a bit in Perl. The intuitive coding of this type of thing incurs mysterious warnings about ``will not stay shared''. For example, this won't work: @@ -646,7 +695,7 @@ The standard Tie::RefHash module provides a convenient workaround to this. =head1 SEE ALSO Besides the obvious documents, source code can be instructive. -Some rather pathological examples of the use of references can be found +Some pathological examples of the use of references can be found in the F<t/op/ref.t> regression test in the Perl source directory. See also L<perldsc> and L<perllol> for how to use references to create diff --git a/contrib/perl5/pod/perlreftut.pod b/contrib/perl5/pod/perlreftut.pod index 09bea59..c8593fb 100644 --- a/contrib/perl5/pod/perlreftut.pod +++ b/contrib/perl5/pod/perlreftut.pod @@ -184,26 +184,26 @@ Using a hash reference is I<exactly> the same: B<Use Rule 2> -C<${$aref}[3]> is too hard to read, so you can write C<$aref-E<gt>[3]> +C<${$aref}[3]> is too hard to read, so you can write C<< $aref->[3] >> instead. C<${$href}{red}> is too hard to read, so you can write -C<$href-E<gt>{red}> instead. +C<< $href->{red} >> instead. Most often, when you have an array or a hash, you want to get or set a single element from it. C<${$aref}[3]> and C<${$href}{'red'}> have too much punctuation, and Perl lets you abbreviate. -If C<$aref> holds a reference to an array, then C<$aref-E<gt>[3]> is +If C<$aref> holds a reference to an array, then C<< $aref->[3] >> is the fourth element of the array. Don't confuse this with C<$aref[3]>, which is the fourth element of a totally different array, one deceptively named C<@aref>. C<$aref> and C<@aref> are unrelated the same way that C<$item> and C<@item> are. -Similarly, C<$href-E<gt>{'red'}> is part of the hash referred to by +Similarly, C<< $href->{'red'} >> is part of the hash referred to by the scalar variable C<$href>, perhaps even one with no name. C<$href{'red'}> is part of the deceptively named C<%href> hash. It's -easy to forget to leave out the C<-E<gt>>, and if you do, you'll get +easy to forget to leave out the C<< -> >>, and if you do, you'll get bizarre results when your program gets array and hash elements out of totally unexpected hashes and arrays that weren't the ones you wanted to use. @@ -228,10 +228,10 @@ another array. C<$a[1]> is one of these references. It refers to an array, the array containing C<(4, 5, 6)>, and because it is a reference to an array, -B<USE RULE 2> says that we can write C<$a[1]-E<gt>[2]> to get the -third element from that array. C<$a[1]-E<gt>[2]> is the 6. -Similarly, C<$a[0]-E<gt>[1]> is the 2. What we have here is like a -two-dimensional array; you can write C<$a[ROW]-E<gt>[COLUMN]> to get +B<USE RULE 2> says that we can write C<< $a[1]->[2] >> to get the +third element from that array. C<< $a[1]->[2] >> is the 6. +Similarly, C<< $a[0]->[1] >> is the 2. What we have here is like a +two-dimensional array; you can write C<< $a[ROW]->[COLUMN] >> to get or set the element in any row and any column of the array. The notation still looks a little cumbersome, so there's one more @@ -241,8 +241,8 @@ abbreviation: In between two B<subscripts>, the arrow is optional. -Instead of C<$a[1]-E<gt>[2]>, we can write C<$a[1][2]>; it means the -same thing. Instead of C<$a[0]-E<gt>[1]>, we can write C<$a[0][1]>; +Instead of C<< $a[1]->[2] >>, we can write C<$a[1][2]>; it means the +same thing. Instead of C<< $a[0]->[1] >>, we can write C<$a[0][1]>; it means the same thing. Now it really looks like two-dimensional arrays! diff --git a/contrib/perl5/pod/perlrun.pod b/contrib/perl5/pod/perlrun.pod index 7cb9aed..f1e2c9a 100644 --- a/contrib/perl5/pod/perlrun.pod +++ b/contrib/perl5/pod/perlrun.pod @@ -4,7 +4,7 @@ perlrun - how to execute the Perl interpreter =head1 SYNOPSIS -B<perl> S<[ B<-sTuU> ]> +B<perl> S<[ B<-CsTuUWX> ]> S<[ B<-hv> ] [ B<-V>[:I<configvar>] ]> S<[ B<-cw> ] [ B<-d>[:I<debugger>] ] [ B<-D>[I<number/list>] ]> S<[ B<-pna> ] [ B<-F>I<pattern> ] [ B<-l>[I<octal>] ] [ B<-0>[I<octal>] ]> @@ -17,7 +17,11 @@ B<perl> S<[ B<-sTuU> ]> =head1 DESCRIPTION -Upon startup, Perl looks for your script in one of the following +The normal way to run a Perl program is by making it directly +executable, or else by passing the name of the source file as an +argument on the command line. (An interactive Perl environment +is also possible--see L<perldebug> for details on how to do that.) +Upon startup, Perl looks for your program in one of the following places: =over 4 @@ -35,61 +39,71 @@ way. See L<Location of Perl>.) =item 3. Passed in implicitly via standard input. This works only if there are -no filename arguments--to pass arguments to a STDIN script you -must explicitly specify a "-" for the script name. +no filename arguments--to pass arguments to a STDIN-read program you +must explicitly specify a "-" for the program name. =back With methods 2 and 3, Perl starts parsing the input file from the beginning, unless you've specified a B<-x> switch, in which case it scans for the first line starting with #! and containing the word -"perl", and starts there instead. This is useful for running a script +"perl", and starts there instead. This is useful for running a program embedded in a larger message. (In this case you would indicate the end -of the script using the C<__END__> token.) +of the program using the C<__END__> token.) The #! line is always examined for switches as the line is being parsed. Thus, if you're on a machine that allows only one argument with the #! line, or worse, doesn't even recognize the #! line, you still can get consistent switch behavior regardless of how Perl was -invoked, even if B<-x> was used to find the beginning of the script. - -Because many operating systems silently chop off kernel interpretation of -the #! line after 32 characters, some switches may be passed in on the -command line, and some may not; you could even get a "-" without its -letter, if you're not careful. You probably want to make sure that all -your switches fall either before or after that 32 character boundary. -Most switches don't actually care if they're processed redundantly, but -getting a - instead of a complete switch could cause Perl to try to -execute standard input instead of your script. And a partial B<-I> switch +invoked, even if B<-x> was used to find the beginning of the program. + +Because historically some operating systems silently chopped off +kernel interpretation of the #! line after 32 characters, some +switches may be passed in on the command line, and some may not; +you could even get a "-" without its letter, if you're not careful. +You probably want to make sure that all your switches fall either +before or after that 32-character boundary. Most switches don't +actually care if they're processed redundantly, but getting a "-" +instead of a complete switch could cause Perl to try to execute +standard input instead of your program. And a partial B<-I> switch could also cause odd results. -Some switches do care if they are processed twice, for instance combinations -of B<-l> and B<-0>. Either put all the switches after the 32 character -boundary (if applicable), or replace the use of B<-0>I<digits> by -C<BEGIN{ $/ = "\0digits"; }>. +Some switches do care if they are processed twice, for instance +combinations of B<-l> and B<-0>. Either put all the switches after +the 32-character boundary (if applicable), or replace the use of +B<-0>I<digits> by C<BEGIN{ $/ = "\0digits"; }>. Parsing of the #! switches starts wherever "perl" is mentioned in the line. The sequences "-*" and "- " are specifically ignored so that you could, if you were so inclined, say #!/bin/sh -- # -*- perl -*- -p - eval 'exec /usr/bin/perl -wS $0 ${1+"$@"}' + eval 'exec perl -wS $0 ${1+"$@"}' if $running_under_some_shell; -to let Perl see the B<-p> switch. +to let Perl see the B<-p> switch. + +A similar trick involves the B<env> program, if you have it. + + #!/usr/bin/env perl + +The examples above use a relative path to the perl interpreter, +getting whatever version is first in the user's path. If you want +a specific version of Perl, say, perl5.005_57, you should place +that directly in the #! line's path. If the #! line does not contain the word "perl", the program named after the #! is executed instead of the Perl interpreter. This is slightly bizarre, but it helps people on machines that don't do #!, because they -can tell a program that their SHELL is /usr/bin/perl, and Perl will then +can tell a program that their SHELL is F</usr/bin/perl>, and Perl will then dispatch the program to the correct interpreter for them. -After locating your script, Perl compiles the entire script to an +After locating your program, Perl compiles the entire program to an internal form. If there are any compilation errors, execution of the -script is not attempted. (This is unlike the typical shell script, +program is not attempted. (This is unlike the typical shell script, which might run part-way through before finding a syntax error.) -If the script is syntactically correct, it is executed. If the script +If the program is syntactically correct, it is executed. If the program runs off the end without hitting an exit() or die() operator, an implicit C<exit(0)> is provided to indicate successful completion. @@ -105,28 +119,27 @@ Put extproc perl -S -your_switches -as the first line in C<*.cmd> file (C<-S> due to a bug in cmd.exe's +as the first line in C<*.cmd> file (B<-S> due to a bug in cmd.exe's `extproc' handling). =item MS-DOS -Create a batch file to run your script, and codify it in +Create a batch file to run your program, and codify it in C<ALTERNATIVE_SHEBANG> (see the F<dosish.h> file in the source distribution for more information). =item Win95/NT -The Win95/NT installation, when using the Activeware port of Perl, +The Win95/NT installation, when using the ActiveState installer for Perl, will modify the Registry to associate the F<.pl> extension with the perl -interpreter. If you install another port of Perl, including the one -in the Win32 directory of the Perl distribution, then you'll have to -modify the Registry yourself. Note that this means you can no -longer tell the difference between an executable Perl program -and a Perl library file. +interpreter. If you install Perl by other means (including building from +the sources), you may have to modify the Registry yourself. Note that +this means you can no longer tell the difference between an executable +Perl program and a Perl library file. =item Macintosh -Macintosh perl scripts will have the appropriate Creator and +A Macintosh perl program will have the appropriate Creator and Type, so that double-clicking them will invoke the perl application. =item VMS @@ -136,10 +149,10 @@ Put $ perl -mysw 'f$env("procedure")' 'p1' 'p2' 'p3' 'p4' 'p5' 'p6' 'p7' 'p8' ! $ exit++ + ++$status != 0 and $exit = $status = undef; -at the top of your script, where C<-mysw> are any command line switches you -want to pass to Perl. You can now invoke the script directly, by saying -C<perl script>, or as a DCL procedure, by saying C<@script> (or implicitly -via F<DCL$PATH> by just using the name of the script). +at the top of your program, where B<-mysw> are any command line switches you +want to pass to Perl. You can now invoke the program directly, by saying +C<perl program>, or as a DCL procedure, by saying C<@program> (or implicitly +via F<DCL$PATH> by just using the name of the program). This incantation is a bit much to remember, but Perl will display it for you if you say C<perl "-V:startperl">. @@ -150,10 +163,10 @@ Command-interpreters on non-Unix systems have rather different ideas on quoting than Unix shells. You'll need to learn the special characters in your command-interpreter (C<*>, C<\> and C<"> are common) and how to protect whitespace and these characters to run -one-liners (see C<-e> below). +one-liners (see B<-e> below). On some systems, you may have to change single-quotes to double ones, -which you must I<NOT> do on Unix or Plan9 systems. You might also +which you must I<not> do on Unix or Plan9 systems. You might also have to change a single % to a %%. For example: @@ -171,13 +184,13 @@ For example: # VMS perl -e "print ""Hello world\n""" -The problem is that none of this is reliable: it depends on the command -and it is entirely possible neither works. If 4DOS was the command shell, this would -probably work better: +The problem is that none of this is reliable: it depends on the +command and it is entirely possible neither works. If B<4DOS> were +the command shell, this would probably work better: perl -e "print <Ctrl-x>"Hello world\n<Ctrl-x>"" -CMD.EXE in Windows NT slipped a lot of standard Unix functionality in +B<CMD.EXE> in Windows NT slipped a lot of standard Unix functionality in when nobody was looking, but just try to find documentation for its quoting rules. @@ -191,22 +204,30 @@ There is no general solution to all of this. It's just a mess. =head2 Location of Perl It may seem obvious to say, but Perl is useful only when users can -easily find it. When possible, it's good for both B</usr/bin/perl> and -B</usr/local/bin/perl> to be symlinks to the actual binary. If that -can't be done, system administrators are strongly encouraged to put -(symlinks to) perl and its accompanying utilities, such as perldoc, into -a directory typically found along a user's PATH, or in another obvious -and convenient place. +easily find it. When possible, it's good for both F</usr/bin/perl> +and F</usr/local/bin/perl> to be symlinks to the actual binary. If +that can't be done, system administrators are strongly encouraged +to put (symlinks to) perl and its accompanying utilities into a +directory typically found along a user's PATH, or in some other +obvious and convenient place. + +In this documentation, C<#!/usr/bin/perl> on the first line of the program +will stand in for whatever method works on your system. You are +advised to use a specific path if you care about a specific version. + + #!/usr/local/bin/perl5.00554 -In this documentation, C<#!/usr/bin/perl> on the first line of the script -will stand in for whatever method works on your system. +or if you just want to be running at least version, place a statement +like this at the top of your program: -=head2 Switches + use 5.005_54; -A single-character switch may be combined with the following switch, if -any. +=head2 Command Switches - #!/usr/bin/perl -spi.bak # same as -s -p -i.bak +As with all standard commands, a single-character switch may be +clustered with the following switch, if any. + + #!/usr/bin/perl -spi.orig # same as -s -p -i.orig Switches include: @@ -220,7 +241,7 @@ precede or follow the digits. For example, if you have a version of B<find> which can print filenames terminated by the null character, you can say this: - find . -name '*.bak' -print0 | perl -n0e unlink + find . -name '*.orig' -print0 | perl -n0e unlink The special value 00 will cause Perl to slurp files in paragraph mode. The value 0777 will cause Perl to slurp files whole because there is no @@ -243,28 +264,37 @@ is equivalent to An alternate delimiter may be specified using B<-F>. +=item B<-C> + +enables Perl to use the native wide character APIs on the target system. +The magic variable C<${^WIDE_SYSTEM_CALLS}> reflects the state of +this switch. See L<perlvar/"${^WIDE_SYSTEM_CALLS}">. + +This feature is currently only implemented on the Win32 platform. + =item B<-c> -causes Perl to check the syntax of the script and then exit without -executing it. Actually, it I<will> execute C<BEGIN>, C<END>, and C<use> blocks, -because these are considered as occurring outside the execution of -your program. +causes Perl to check the syntax of the program and then exit without +executing it. Actually, it I<will> execute C<BEGIN>, C<CHECK>, and +C<use> blocks, because these are considered as occurring outside the +execution of your program. C<INIT> and C<END> blocks, however, will +be skipped. =item B<-d> -runs the script under the Perl debugger. See L<perldebug>. +runs the program under the Perl debugger. See L<perldebug>. =item B<-d:>I<foo> -runs the script under the control of a debugging or tracing module -installed as Devel::foo. E.g., B<-d:DProf> executes the script using the -Devel::DProf profiler. See L<perldebug>. +runs the program under the control of a debugging, profiling, or +tracing module installed as Devel::foo. E.g., B<-d:DProf> executes +the program using the Devel::DProf profiler. See L<perldebug>. =item B<-D>I<letters> =item B<-D>I<number> -sets debugging flags. To watch how it executes your script, use +sets debugging flags. To watch how it executes your program, use B<-Dtls>. (This works only if debugging is compiled into your Perl.) Another nice value is B<-Dx>, which lists your compiled syntax tree. And B<-Dr> displays compiled regular expressions. As an @@ -283,24 +313,35 @@ equivalent to B<-Dtls>): 512 r Regular expression parsing and execution 1024 x Syntax tree dump 2048 u Tainting checks - 4096 L Memory leaks (needs C<-DLEAKTEST> when compiling Perl) + 4096 L Memory leaks (needs -DLEAKTEST when compiling Perl) 8192 H Hash dump -- usurps values() 16384 X Scratchpad allocation 32768 D Cleaning up 65536 S Thread synchronization -All these flags require C<-DDEBUGGING> when you compile the Perl -executable. This flag is automatically set if you include C<-g> +All these flags require B<-DDEBUGGING> when you compile the Perl +executable. See the F<INSTALL> file in the Perl source distribution +for how to do this. This flag is automatically set if you include B<-g> option when C<Configure> asks you about optimizer/debugger flags. +If you're just trying to get a print out of each line of Perl code +as it executes, the way that C<sh -x> provides for shell scripts, +you can't use Perl's B<-D> switch. Instead do this + + # Bourne shell syntax + $ PERLDB_OPTS="NonStop=1 AutoTrace=1 frame=2" perl -dS program + + # csh syntax + % (setenv PERLDB_OPTS "NonStop=1 AutoTrace=1 frame=2"; perl -dS program) + +See L<perldebug> for details and variations. + =item B<-e> I<commandline> -may be used to enter one line of script. -If B<-e> is given, Perl -will not look for a script filename in the argument list. -Multiple B<-e> commands may -be given to build up a multi-line script. -Make sure to use semicolons where you would in a normal program. +may be used to enter one line of program. If B<-e> is given, Perl +will not look for a filename in the argument list. Multiple B<-e> +commands may be given to build up a multi-line script. Make sure +to use semicolons where you would in a normal program. =item B<-F>I<pattern> @@ -324,47 +365,46 @@ rules: If no extension is supplied, no backup is made and the current file is overwritten. -If the extension doesn't contain a C<*> then it is appended to the end -of the current filename as a suffix. - -If the extension does contain one or more C<*> characters, then each C<*> -is replaced with the current filename. In perl terms you could think of -this as: +If the extension doesn't contain a C<*>, then it is appended to the +end of the current filename as a suffix. If the extension does +contain one or more C<*> characters, then each C<*> is replaced +with the current filename. In Perl terms, you could think of this +as: ($backup = $extension) =~ s/\*/$file_name/g; This allows you to add a prefix to the backup file, instead of (or in addition to) a suffix: - $ perl -pi'bak_*' -e 's/bar/baz/' fileA # backup to 'bak_fileA' + $ perl -pi 'orig_*' -e 's/bar/baz/' fileA # backup to 'orig_fileA' Or even to place backup copies of the original files into another directory (provided the directory already exists): - $ perl -pi'old/*.bak' -e 's/bar/baz/' fileA # backup to 'old/fileA.bak' + $ perl -pi 'old/*.orig' -e 's/bar/baz/' fileA # backup to 'old/fileA.orig' These sets of one-liners are equivalent: $ perl -pi -e 's/bar/baz/' fileA # overwrite current file - $ perl -pi'*' -e 's/bar/baz/' fileA # overwrite current file + $ perl -pi '*' -e 's/bar/baz/' fileA # overwrite current file - $ perl -pi'.bak' -e 's/bar/baz/' fileA # backup to 'fileA.bak' - $ perl -pi'*.bak' -e 's/bar/baz/' fileA # backup to 'fileA.bak' + $ perl -pi '.orig' -e 's/bar/baz/' fileA # backup to 'fileA.orig' + $ perl -pi '*.orig' -e 's/bar/baz/' fileA # backup to 'fileA.orig' From the shell, saying - $ perl -p -i.bak -e "s/foo/bar/; ... " + $ perl -p -i.orig -e "s/foo/bar/; ... " -is the same as using the script: +is the same as using the program: - #!/usr/bin/perl -pi.bak + #!/usr/bin/perl -pi.orig s/foo/bar/; which is equivalent to #!/usr/bin/perl - $extension = '.bak'; - while (<>) { + $extension = '.orig'; + LINE: while (<>) { if ($ARGV ne $oldargv) { if ($extension !~ /\*/) { $backup = $ARGV . $extension; @@ -392,9 +432,9 @@ output filehandle after the loop. As shown above, Perl creates the backup file whether or not any output is actually changed. So this is just a fancy way to copy files: - $ perl -p -i'/some/file/path/*' -e 1 file1 file2 file3... - or - $ perl -p -i'.bak' -e 1 file1 file2 file3... + $ perl -p -i '/some/file/path/*' -e 1 file1 file2 file3... +or + $ perl -p -i '.orig' -e 1 file1 file2 file3... You can use C<eof> without parentheses to locate the end of each input file, in case you want to append to each file, or reset line numbering @@ -404,15 +444,19 @@ If, for a given file, Perl is unable to create the backup file as specified in the extension then it will skip that file and continue on with the next one (if it exists). -For a discussion of issues surrounding file permissions and C<-i>, see -L<perlfaq5/Why does Perl let me delete read-only files? Why does -i clobber protected files? Isn't this a bug in Perl?>. +For a discussion of issues surrounding file permissions and B<-i>, +see L<perlfaq5/Why does Perl let me delete read-only files? Why +does -i clobber protected files? Isn't this a bug in Perl?>. You cannot use B<-i> to create directories or to strip extensions from files. -Perl does not expand C<~>, so don't do that. +Perl does not expand C<~> in filenames, which is good, since some +folks use it for their backup files: -Finally, note that the B<-i> switch does not impede execution when no + $ perl -pi~ -e 's/foo/bar/' file1 file2 file3... + +Finally, the B<-i> switch does not impede execution when no files are given on the command line. In this case, no backup is made (the original file cannot, of course, be determined) and processing proceeds from STDIN to STDOUT as might be expected. @@ -426,13 +470,13 @@ searches /usr/include and /usr/lib/perl. =item B<-l>[I<octnum>] -enables automatic line-ending processing. It has two effects: first, -it automatically chomps "C<$/>" (the input record separator) when used -with B<-n> or B<-p>, and second, it assigns "C<$\>" -(the output record separator) to have the value of I<octnum> so that -any print statements will have that separator added back on. If -I<octnum> is omitted, sets "C<$\>" to the current value of "C<$/>". For -instance, to trim lines to 80 columns: +enables automatic line-ending processing. It has two separate +effects. First, it automatically chomps C<$/> (the input record +separator) when used with B<-n> or B<-p>. Second, it assigns C<$\> +(the output record separator) to have the value of I<octnum> so +that any print statements will have that separator added back on. +If I<octnum> is omitted, sets C<$\> to the current value of +C<$/>. For instance, to trim lines to 80 columns: perl -lpe 'substr($_, 80) = ""' @@ -452,55 +496,59 @@ This sets C<$\> to newline and then sets C<$/> to the null character. =item B<-[mM]>[B<->]I<module=arg[,arg]...> -C<-m>I<module> executes C<use> I<module> C<();> before executing your -script. +B<-m>I<module> executes C<use> I<module> C<();> before executing your +program. -C<-M>I<module> executes C<use> I<module> C<;> before executing your -script. You can use quotes to add extra code after the module name, -e.g., C<-M'module qw(foo bar)'>. +B<-M>I<module> executes C<use> I<module> C<;> before executing your +program. You can use quotes to add extra code after the module name, +e.g., C<'-Mmodule qw(foo bar)'>. -If the first character after the C<-M> or C<-m> is a dash (C<->) +If the first character after the B<-M> or B<-m> is a dash (C<->) then the 'use' is replaced with 'no'. A little builtin syntactic sugar means you can also say -C<-mmodule=foo,bar> or C<-Mmodule=foo,bar> as a shortcut for -C<-M'module qw(foo bar)'>. This avoids the need to use quotes when -importing symbols. The actual code generated by C<-Mmodule=foo,bar> is +B<-mmodule=foo,bar> or B<-Mmodule=foo,bar> as a shortcut for +C<'-Mmodule qw(foo bar)'>. This avoids the need to use quotes when +importing symbols. The actual code generated by B<-Mmodule=foo,bar> is C<use module split(/,/,q{foo,bar})>. Note that the C<=> form -removes the distinction between C<-m> and C<-M>. +removes the distinction between B<-m> and B<-M>. =item B<-n> -causes Perl to assume the following loop around your script, which +causes Perl to assume the following loop around your program, which makes it iterate over filename arguments somewhat like B<sed -n> or B<awk>: + LINE: while (<>) { - ... # your script goes here + ... # your program goes here } Note that the lines are not printed by default. See B<-p> to have lines printed. If a file named by an argument cannot be opened for -some reason, Perl warns you about it, and moves on to the next file. +some reason, Perl warns you about it and moves on to the next file. Here is an efficient way to delete all files older than a week: - find . -mtime +7 -print | perl -nle 'unlink;' + find . -mtime +7 -print | perl -nle unlink -This is faster than using the C<-exec> switch of B<find> because you don't -have to start a process on every filename found. +This is faster than using the B<-exec> switch of B<find> because you don't +have to start a process on every filename found. It does suffer from +the bug of mishandling newlines in pathnames, which you can fix if +you C<BEGIN> and C<END> blocks may be used to capture control before or after -the implicit loop, just as in B<awk>. +the implicit program loop, just as in B<awk>. =item B<-p> -causes Perl to assume the following loop around your script, which +causes Perl to assume the following loop around your program, which makes it iterate over filename arguments somewhat like B<sed>: + LINE: while (<>) { - ... # your script goes here + ... # your program goes here } continue { print or die "-p destination: $!\n"; } @@ -512,30 +560,32 @@ treated as fatal. To suppress printing use the B<-n> switch. A B<-p> overrides a B<-n> switch. C<BEGIN> and C<END> blocks may be used to capture control before or after -the implicit loop, just as in awk. +the implicit loop, just as in B<awk>. =item B<-P> -causes your script to be run through the C preprocessor before -compilation by Perl. (Because both comments and cpp directives begin +causes your program to be run through the C preprocessor before +compilation by Perl. (Because both comments and B<cpp> directives begin with the # character, you should avoid starting comments with any words recognized by the C preprocessor such as "if", "else", or "define".) =item B<-s> -enables some rudimentary switch parsing for switches on the command -line after the script name but before any filename arguments (or before +enables rudimentary switch parsing for switches on the command +line after the program name but before any filename arguments (or before a B<-->). Any switch found there is removed from @ARGV and sets the -corresponding variable in the Perl script. The following script -prints "true" if and only if the script is invoked with a B<-xyz> switch. +corresponding variable in the Perl program. The following program +prints "1" if the program is invoked with a B<-xyz> switch, and "abc" +if it is invoked with B<-xyz=abc>. #!/usr/bin/perl -s - if ($xyz) { print "true\n"; } + if ($xyz) { print "$xyz\n" } =item B<-S> makes Perl use the PATH environment variable to search for the -script (unless the name of the script contains directory separators). +program (unless the name of the program contains directory separators). + On some platforms, this also makes Perl append suffixes to the filename while searching for it. For example, on Win32 platforms, the ".bat" and ".cmd" suffixes are appended if a lookup for the @@ -543,16 +593,6 @@ original name fails, and if the name does not already end in one of those suffixes. If your Perl was compiled with DEBUGGING turned on, using the -Dp switch to Perl shows how the search progresses. -If the filename supplied contains directory separators (i.e. it is an -absolute or relative pathname), and if the file is not found, -platforms that append file extensions will do so and try to look -for the file with those extensions added, one by one. - -On DOS-like platforms, if the script does not contain directory -separators, it will first be searched for in the current directory -before being searched for on the PATH. On Unix platforms, the -script will be searched for strictly on the PATH. - Typically this is used to emulate #! startup on platforms that don't support #!. This example works on many platforms that have a shell compatible with Bourne shell: @@ -561,94 +601,130 @@ have a shell compatible with Bourne shell: eval 'exec /usr/bin/perl -wS $0 ${1+"$@"}' if $running_under_some_shell; -The system ignores the first line and feeds the script to /bin/sh, -which proceeds to try to execute the Perl script as a shell script. +The system ignores the first line and feeds the program to F</bin/sh>, +which proceeds to try to execute the Perl program as a shell script. The shell executes the second line as a normal shell command, and thus starts up the Perl interpreter. On some systems $0 doesn't always contain the full pathname, so the B<-S> tells Perl to search for the -script if necessary. After Perl locates the script, it parses the +program if necessary. After Perl locates the program, it parses the lines and ignores them because the variable $running_under_some_shell -is never true. If the script will be interpreted by csh, you will need +is never true. If the program will be interpreted by csh, you will need to replace C<${1+"$@"}> with C<$*>, even though that doesn't understand embedded spaces (and such) in the argument list. To start up sh rather than csh, some systems may have to replace the #! line with a line containing just a colon, which will be politely ignored by Perl. Other systems can't control that, and need a totally devious construct that -will work under any of csh, sh, or Perl, such as the following: +will work under any of B<csh>, B<sh>, or Perl, such as the following: - eval '(exit $?0)' && eval 'exec /usr/bin/perl -wS $0 ${1+"$@"}' + eval '(exit $?0)' && eval 'exec perl -wS $0 ${1+"$@"}' & eval 'exec /usr/bin/perl -wS $0 $argv:q' if $running_under_some_shell; +If the filename supplied contains directory separators (i.e., is an +absolute or relative pathname), and if that file is not found, +platforms that append file extensions will do so and try to look +for the file with those extensions added, one by one. + +On DOS-like platforms, if the program does not contain directory +separators, it will first be searched for in the current directory +before being searched for on the PATH. On Unix platforms, the +program will be searched for strictly on the PATH. + =item B<-T> forces "taint" checks to be turned on so you can test them. Ordinarily -these checks are done only when running setuid or setgid. It's a good -idea to turn them on explicitly for programs run on another's behalf, -such as CGI programs. See L<perlsec>. Note that (for security reasons) -this option must be seen by Perl quite early; usually this means it must -appear early on the command line or in the #! line (for systems which -support that). +these checks are done only when running setuid or setgid. It's a +good idea to turn them on explicitly for programs that run on behalf +of someone else whom you might not necessarily trust, such as CGI +programs or any internet servers you might write in Perl. See +L<perlsec> for details. For security reasons, this option must be +seen by Perl quite early; usually this means it must appear early +on the command line or in the #! line for systems which support +that construct. =item B<-u> -causes Perl to dump core after compiling your script. You can then -in theory take this core dump and turn it into an executable file by using the -B<undump> program (not supplied). This speeds startup at the expense of -some disk space (which you can minimize by stripping the executable). -(Still, a "hello world" executable comes out to about 200K on my -machine.) If you want to execute a portion of your script before dumping, -use the dump() operator instead. Note: availability of B<undump> is -platform specific and may not be available for a specific port of -Perl. It has been superseded by the new perl-to-C compiler, which is more -portable, even though it's still only considered beta. +This obsolete switch causes Perl to dump core after compiling your +program. You can then in theory take this core dump and turn it +into an executable file by using the B<undump> program (not supplied). +This speeds startup at the expense of some disk space (which you +can minimize by stripping the executable). (Still, a "hello world" +executable comes out to about 200K on my machine.) If you want to +execute a portion of your program before dumping, use the dump() +operator instead. Note: availability of B<undump> is platform +specific and may not be available for a specific port of Perl. + +This switch has been superseded in favor of the new Perl code +generator backends to the compiler. See L<B> and L<B::Bytecode> +for details. =item B<-U> allows Perl to do unsafe operations. Currently the only "unsafe" operations are the unlinking of directories while running as superuser, and running setuid programs with fatal taint checks turned into -warnings. Note that the B<-w> switch (or the C<$^W> variable) must -be used along with this option to actually B<generate> the +warnings. Note that the B<-w> switch (or the C<$^W> variable) must +be used along with this option to actually I<generate> the taint-check warnings. =item B<-v> -prints the version and patchlevel of your Perl executable. +prints the version and patchlevel of your perl executable. =item B<-V> prints summary of the major perl configuration values and the current -value of @INC. +values of @INC. =item B<-V:>I<name> Prints to STDOUT the value of the named configuration variable. +For example, + + $ perl -V:man.dir + +will provide strong clues about what your MANPATH variable should +be set to in order to access the Perl documentation. =item B<-w> -prints warnings about variable names that are mentioned only once, and -scalar variables that are used before being set. Also warns about -redefined subroutines, and references to undefined filehandles or -filehandles opened read-only that you are attempting to write on. Also -warns you if you use values as a number that doesn't look like numbers, -using an array as though it were a scalar, if your subroutines recurse -more than 100 deep, and innumerable other things. +prints warnings about dubious constructs, such as variable names +that are mentioned only once and scalar variables that are used +before being set, redefined subroutines, references to undefined +filehandles or filehandles opened read-only that you are attempting +to write on, values used as a number that doesn't look like numbers, +using an array as though it were a scalar, if your subroutines +recurse more than 100 deep, and innumerable other things. + +This switch really just enables the internal C<^$W> variable. You +can disable or promote into fatal errors specific warnings using +C<__WARN__> hooks, as described in L<perlvar> and L<perlfunc/warn>. +See also L<perldiag> and L<perltrap>. A new, fine-grained warning +facility is also available if you want to manipulate entire classes +of warnings; see L<warnings> or L<perllexwarn>. -You can disable specific warnings using C<__WARN__> hooks, as described -in L<perlvar> and L<perlfunc/warn>. See also L<perldiag> and L<perltrap>. +=item B<-W> + +Enables all warnings regardless of C<no warnings> or C<$^W>. +See L<perllexwarn>. + +=item B<-X> + +Disables all warnings regardless of C<use warnings> or C<$^W>. +See L<perllexwarn>. =item B<-x> I<directory> -tells Perl that the script is embedded in a message. Leading -garbage will be discarded until the first line that starts with #! and -contains the string "perl". Any meaningful switches on that line will -be applied. If a directory name is specified, Perl will switch to -that directory before running the script. The B<-x> switch controls -only the disposal of leading garbage. The script must be -terminated with C<__END__> if there is trailing garbage to be ignored (the -script can process any or all of the trailing garbage via the DATA -filehandle if desired). +tells Perl that the program is embedded in a larger chunk of unrelated +ASCII text, such as in a mail message. Leading garbage will be +discarded until the first line that starts with #! and contains the +string "perl". Any meaningful switches on that line will be applied. +If a directory name is specified, Perl will switch to that directory +before running the program. The B<-x> switch controls only the +disposal of leading garbage. The program must be terminated with +C<__END__> if there is trailing garbage to be ignored (the program +can process any or all of the trailing garbage via the DATA filehandle +if desired). =back @@ -666,17 +742,20 @@ Used if chdir has no argument and HOME is not set. =item PATH -Used in executing subprocesses, and in finding the script if B<-S> is +Used in executing subprocesses, and in finding the program if B<-S> is used. =item PERL5LIB A colon-separated list of directories in which to look for Perl library files before looking in the standard library and the current -directory. If PERL5LIB is not defined, PERLLIB is used. When running -taint checks (because the script was running setuid or setgid, or the -B<-T> switch was used), neither variable is used. The script should -instead say +directory. Any architecture-specific directories under the specified +locations are automatically included if they exist. If PERL5LIB is not +defined, PERLLIB is used. + +When running taint checks (either because the program was running setuid +or setgid, or the B<-T> switch was used), neither variable is used. +The program should instead say: use lib "/my/directory"; @@ -684,7 +763,7 @@ instead say Command-line options (switches). Switches in this variable are taken as if they were on every Perl command line. Only the B<-[DIMUdmw]> -switches are allowed. When running taint checks (because the script +switches are allowed. When running taint checks (because the program was running setuid or setgid, or the B<-T> switch was used), this variable is ignored. If PERL5OPT begins with B<-T>, tainting will be enabled, and any subsequent options ignored. @@ -701,12 +780,12 @@ The command used to load the debugger code. The default is: BEGIN { require 'perl5db.pl' } -=item PERL5SHELL (specific to WIN32 port) +=item PERL5SHELL (specific to the Win32 port) May be set to an alternative shell that perl must use internally for executing "backtick" commands or system(). Default is C<cmd.exe /x/c> on WindowsNT and C<command.com /c> on Windows95. The value is considered -to be space delimited. Precede any character that needs to be protected +to be space-separated. Precede any character that needs to be protected (like a space or backslash) with a backslash. Note that Perl doesn't use COMSPEC for this purpose because @@ -736,12 +815,11 @@ Perl also has environment variables that control how Perl handles data specific to particular natural languages. See L<perllocale>. Apart from these, Perl uses no other environment variables, except -to make them available to the script being executed, and to child -processes. However, scripts running setuid would do well to execute +to make them available to the program being executed, and to child +processes. However, programs running setuid would do well to execute the following lines before doing anything else, just to keep people honest: - $ENV{PATH} = '/bin:/usr/bin'; # or whatever you need + $ENV{PATH} = '/bin:/usr/bin'; # or whatever you need $ENV{SHELL} = '/bin/sh' if exists $ENV{SHELL}; delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; - diff --git a/contrib/perl5/pod/perlsec.pod b/contrib/perl5/pod/perlsec.pod index 0b22acd..4185e84 100644 --- a/contrib/perl5/pod/perlsec.pod +++ b/contrib/perl5/pod/perlsec.pod @@ -32,17 +32,19 @@ program more secure than the corresponding C program. You may not use data derived from outside your program to affect something else outside your program--at least, not by accident. All command line arguments, environment variables, locale information (see -L<perllocale>), results of certain system calls (readdir, readlink, -the gecos field of getpw* calls), and all file input are marked as -"tainted". Tainted data may not be used directly or indirectly in any -command that invokes a sub-shell, nor in any command that modifies -files, directories, or processes. (B<Important exception>: If you pass -a list of arguments to either C<system> or C<exec>, the elements of -that list are B<NOT> checked for taintedness.) Any variable set -to a value derived from tainted data will itself be tainted, -even if it is logically impossible for the tainted data -to alter the variable. Because taintedness is associated with each -scalar value, some elements of an array can be tainted and others not. +L<perllocale>), results of certain system calls (readdir(), +readlink(), the variable of shmread(), the messages returned by +msgrcv(), the password, gcos and shell fields returned by the +getpwxxx() calls), and all file input are marked as "tainted". +Tainted data may not be used directly or indirectly in any command +that invokes a sub-shell, nor in any command that modifies files, +directories, or processes. (B<Important exception>: If you pass a list +of arguments to either C<system> or C<exec>, the elements of that list +are B<NOT> checked for taintedness.) Any variable set to a value +derived from tainted data will itself be tainted, even if it is +logically impossible for the tainted data to alter the variable. +Because taintedness is associated with each scalar value, some +elements of an array can be tainted and others not. For example: @@ -84,8 +86,8 @@ For example: exec "echo", $arg; # Secure (doesn't use the shell) exec "sh", '-c', $arg; # Considered secure, alas! - @files = <*.c>; # Always insecure (uses csh) - @files = glob('*.c'); # Always insecure (uses csh) + @files = <*.c>; # insecure (uses readdir() or similar) + @files = glob('*.c'); # insecure (uses readdir() or similar) If you try to do something insecure, you will get a fatal error saying something like "Insecure dependency" or "Insecure $ENV{PATH}". Note that you @@ -139,7 +141,7 @@ metacharacters, nor are dot, dash, or at going to mean something special to the shell. Use of C</.+/> would have been insecure in theory because it lets everything through, but Perl doesn't check for that. The lesson is that when untainting, you must be exceedingly careful with your patterns. -Laundering data using regular expression is the I<ONLY> mechanism for +Laundering data using regular expression is the I<only> mechanism for untainting dirty data, unless you use the strategy detailed below to fork a child of lesser privilege. diff --git a/contrib/perl5/pod/perlstyle.pod b/contrib/perl5/pod/perlstyle.pod index 04aab98..bfe5b76 100644 --- a/contrib/perl5/pod/perlstyle.pod +++ b/contrib/perl5/pod/perlstyle.pod @@ -10,7 +10,8 @@ make your programs easier to read, understand, and maintain. The most important thing is to run your programs under the B<-w> flag at all times. You may turn it off explicitly for particular -portions of code via the C<$^W> variable if you must. You should +portions of code via the C<use warnings> pragma or the C<$^W> variable +if you must. You should also always run under C<use strict> or know the reason why not. The C<use sigtrap> and even C<use diagnostics> pragmas may also prove useful. @@ -260,7 +261,8 @@ Line up your transliterations when it makes sense: Think about reusability. Why waste brainpower on a one-shot when you might want to do something like it again? Consider generalizing your code. Consider writing a module or object class. Consider making your -code run cleanly with C<use strict> and B<-w> in effect. Consider giving away +code run cleanly with C<use strict> and C<use warnings> (or B<-w>) in effect +Consider giving away your code. Consider changing your whole world view. Consider... oh, never mind. diff --git a/contrib/perl5/pod/perlsub.pod b/contrib/perl5/pod/perlsub.pod index bfab0fe..46d1a2a 100644 --- a/contrib/perl5/pod/perlsub.pod +++ b/contrib/perl5/pod/perlsub.pod @@ -6,35 +6,42 @@ perlsub - Perl subroutines To declare subroutines: - sub NAME; # A "forward" declaration. - sub NAME(PROTO); # ditto, but with prototypes + sub NAME; # A "forward" declaration. + sub NAME(PROTO); # ditto, but with prototypes + sub NAME : ATTRS; # with attributes + sub NAME(PROTO) : ATTRS; # with attributes and prototypes - sub NAME BLOCK # A declaration and a definition. - sub NAME(PROTO) BLOCK # ditto, but with prototypes + sub NAME BLOCK # A declaration and a definition. + sub NAME(PROTO) BLOCK # ditto, but with prototypes + sub NAME : ATTRS BLOCK # with attributes + sub NAME(PROTO) : ATTRS BLOCK # with prototypes and attributes To define an anonymous subroutine at runtime: - $subref = sub BLOCK; # no proto - $subref = sub (PROTO) BLOCK; # with proto + $subref = sub BLOCK; # no proto + $subref = sub (PROTO) BLOCK; # with proto + $subref = sub : ATTRS BLOCK; # with attributes + $subref = sub (PROTO) : ATTRS BLOCK; # with proto and attributes To import subroutines: - use PACKAGE qw(NAME1 NAME2 NAME3); + use MODULE qw(NAME1 NAME2 NAME3); To call subroutines: NAME(LIST); # & is optional with parentheses. NAME LIST; # Parentheses optional if predeclared/imported. + &NAME(LIST); # Circumvent prototypes. &NAME; # Makes current @_ visible to called subroutine. =head1 DESCRIPTION -Like many languages, Perl provides for user-defined subroutines. These -may be located anywhere in the main program, loaded in from other files -via the C<do>, C<require>, or C<use> keywords, or even generated on the -fly using C<eval> or anonymous subroutines (closures). You can even call -a function indirectly using a variable containing its name or a CODE reference -to it. +Like many languages, Perl provides for user-defined subroutines. +These may be located anywhere in the main program, loaded in from +other files via the C<do>, C<require>, or C<use> keywords, or +generated on the fly using C<eval> or anonymous subroutines (closures). +You can even call a function indirectly using a variable containing +its name or a CODE reference. The Perl model for function call and return values is simple: all functions are passed as parameters one single flat list of scalars, and @@ -44,37 +51,38 @@ collapse, losing their identities--but you may always use pass-by-reference instead to avoid this. Both call and return lists may contain as many or as few scalar elements as you'd like. (Often a function without an explicit return statement is called a subroutine, but -there's really no difference from the language's perspective.) - -Any arguments passed to the routine come in as the array C<@_>. Thus if you -called a function with two arguments, those would be stored in C<$_[0]> -and C<$_[1]>. The array C<@_> is a local array, but its elements are -aliases for the actual scalar parameters. In particular, if an element -C<$_[0]> is updated, the corresponding argument is updated (or an error -occurs if it is not updatable). If an argument is an array or hash -element which did not exist when the function was called, that element is -created only when (and if) it is modified or if a reference to it is -taken. (Some earlier versions of Perl created the element whether or not -it was assigned to.) Note that assigning to the whole array C<@_> removes -the aliasing, and does not update any arguments. - -The return value of the subroutine is the value of the last expression -evaluated. Alternatively, a C<return> statement may be used to exit the +there's really no difference from Perl's perspective.) + +Any arguments passed in show up in the array C<@_>. Therefore, if +you called a function with two arguments, those would be stored in +C<$_[0]> and C<$_[1]>. The array C<@_> is a local array, but its +elements are aliases for the actual scalar parameters. In particular, +if an element C<$_[0]> is updated, the corresponding argument is +updated (or an error occurs if it is not updatable). If an argument +is an array or hash element which did not exist when the function +was called, that element is created only when (and if) it is modified +or a reference to it is taken. (Some earlier versions of Perl +created the element whether or not the element was assigned to.) +Assigning to the whole array C<@_> removes that aliasing, and does +not update any arguments. + +The return value of a subroutine is the value of the last expression +evaluated. More explicitly, a C<return> statement may be used to exit the subroutine, optionally specifying the returned value, which will be evaluated in the appropriate context (list, scalar, or void) depending on the context of the subroutine call. If you specify no return value, -the subroutine will return an empty list in a list context, an undefined -value in a scalar context, or nothing in a void context. If you return -one or more arrays and/or hashes, these will be flattened together into -one large indistinguishable list. - -Perl does not have named formal parameters, but in practice all you do is -assign to a C<my()> list of these. Any variables you use in the function -that aren't declared private are global variables. For the gory details -on creating private variables, see -L<"Private Variables via my()"> and L<"Temporary Values via local()">. -To create protected environments for a set of functions in a separate -package (and probably a separate file), see L<perlmod/"Packages">. +the subroutine returns an empty list in list context, the undefined +value in scalar context, or nothing in void context. If you return +one or more aggregates (arrays and hashes), these will be flattened +together into one large indistinguishable list. + +Perl does not have named formal parameters. In practice all you +do is assign to a C<my()> list of these. Variables that aren't +declared to be private are global variables. For gory details +on creating private variables, see L<"Private Variables via my()"> +and L<"Temporary Values via local()">. To create protected +environments for a set of functions in a separate package (and +probably a separate file), see L<perlmod/"Packages">. Example: @@ -93,7 +101,7 @@ Example: # that start with whitespace sub get_line { - $thisline = $lookahead; # GLOBAL VARIABLES!! + $thisline = $lookahead; # global variables! LINE: while (defined($lookahead = <STDIN>)) { if ($lookahead =~ /^[ \t]/) { $thisline .= $lookahead; @@ -102,24 +110,25 @@ Example: last LINE; } } - $thisline; + return $thisline; } $lookahead = <STDIN>; # get first line - while ($_ = get_line()) { + while (defined($line = get_line())) { ... } -Use array assignment to a local list to name your formal arguments: +Assigning to a list of private variables to name your arguments: sub maybeset { my($key, $value) = @_; $Foo{$key} = $value unless $Foo{$key}; } -This also has the effect of turning call-by-reference into call-by-value, -because the assignment copies the values. Otherwise a function is free to -do in-place modifications of C<@_> and change its caller's values. +Because the assignment copies the values, this also has the effect +of turning call-by-reference into call-by-value. Otherwise a +function is free to do in-place modifications of C<@_> and change +its caller's values. upcase_in($v1, $v2); # this changes $v1 and $v2 sub upcase_in { @@ -136,7 +145,7 @@ It would be much safer if the C<upcase_in()> function were written to return a copy of its parameters instead of changing them in place: - ($v3, $v4) = upcase($v1, $v2); # this doesn't + ($v3, $v4) = upcase($v1, $v2); # this doesn't change $v1 and $v2 sub upcase { return unless defined wantarray; # void context, do nothing my @parms = @_; @@ -144,12 +153,12 @@ of changing them in place: return wantarray ? @parms : $parms[0]; } -Notice how this (unprototyped) function doesn't care whether it was passed -real scalars or arrays. Perl will see everything as one big long flat C<@_> -parameter list. This is one of the ways where Perl's simple -argument-passing style shines. The C<upcase()> function would work perfectly -well without changing the C<upcase()> definition even if we fed it things -like this: +Notice how this (unprototyped) function doesn't care whether it was +passed real scalars or arrays. Perl sees all arugments as one big, +long, flat parameter list in C<@_>. This is one area where +Perl's simple argument-passing style shines. The C<upcase()> +function would work perfectly well without changing the C<upcase()> +definition even if we fed it things like this: @newlist = upcase(@list1, @list2); @newlist = upcase( split /:/, $var ); @@ -158,24 +167,26 @@ Do not, however, be tempted to do this: (@a, @b) = upcase(@list1, @list2); -Because like its flat incoming parameter list, the return list is also -flat. So all you have managed to do here is stored everything in C<@a> and -made C<@b> an empty list. See L<Pass by Reference> for alternatives. - -A subroutine may be called using the "C<&>" prefix. The "C<&>" is optional -in modern Perls, and so are the parentheses if the subroutine has been -predeclared. (Note, however, that the "C<&>" is I<NOT> optional when -you're just naming the subroutine, such as when it's used as an -argument to C<defined()> or C<undef()>. Nor is it optional when you want to -do an indirect subroutine call with a subroutine name or reference -using the C<&$subref()> or C<&{$subref}()> constructs. See L<perlref> -for more on that.) - -Subroutines may be called recursively. If a subroutine is called using -the "C<&>" form, the argument list is optional, and if omitted, no C<@_> array is -set up for the subroutine: the C<@_> array at the time of the call is -visible to subroutine instead. This is an efficiency mechanism that -new users may wish to avoid. +Like the flattened incoming parameter list, the return list is also +flattened on return. So all you have managed to do here is stored +everything in C<@a> and made C<@b> an empty list. See L<Pass by +Reference> for alternatives. + +A subroutine may be called using an explicit C<&> prefix. The +C<&> is optional in modern Perl, as are parentheses if the +subroutine has been predeclared. The C<&> is I<not> optional +when just naming the subroutine, such as when it's used as +an argument to defined() or undef(). Nor is it optional when you +want to do an indirect subroutine call with a subroutine name or +reference using the C<&$subref()> or C<&{$subref}()> constructs, +although the C<< $subref->() >> notation solves that problem. +See L<perlref> for more about all that. + +Subroutines may be called recursively. If a subroutine is called +using the C<&> form, the argument list is optional, and if omitted, +no C<@_> array is set up for the subroutine: the C<@_> array at the +time of the call is visible to subroutine instead. This is an +efficiency mechanism that new users may wish to avoid. &foo(1,2,3); # pass three arguments foo(1,2,3); # the same @@ -186,18 +197,18 @@ new users may wish to avoid. &foo; # foo() get current args, like foo(@_) !! foo; # like foo() IFF sub foo predeclared, else "foo" -Not only does the "C<&>" form make the argument list optional, but it also -disables any prototype checking on the arguments you do provide. This +Not only does the C<&> form make the argument list optional, it also +disables any prototype checking on arguments you do provide. This is partly for historical reasons, and partly for having a convenient way -to cheat if you know what you're doing. See the section on Prototypes below. +to cheat if you know what you're doing. See L<Prototypes> below. -Function whose names are in all upper case are reserved to the Perl core, -just as are modules whose names are in all lower case. A function in -all capitals is a loosely-held convention meaning it will be called -indirectly by the run-time system itself. Functions that do special, -pre-defined things are C<BEGIN>, C<END>, C<AUTOLOAD>, and C<DESTROY>--plus all the -functions mentioned in L<perltie>. The 5.005 release adds C<INIT> -to this list. +Functions whose names are in all upper case are reserved to the Perl +core, as are modules whose names are in all lower case. A +function in all capitals is a loosely-held convention meaning it +will be called indirectly by the run-time system itself, usually +due to a triggered event. Functions that do special, pre-defined +things include C<BEGIN>, C<CHECK>, C<INIT>, C<END>, C<AUTOLOAD>, and +C<DESTROY>--plus all functions mentioned in L<perltie>. =head2 Private Variables via my() @@ -207,37 +218,44 @@ Synopsis: my (@wid, %get); # declare list of variables local my $foo = "flurp"; # declare $foo lexical, and init it my @oof = @bar; # declare @oof lexical, and init it - -A "C<my>" declares the listed variables to be confined (lexically) to the -enclosing block, conditional (C<if/unless/elsif/else>), loop -(C<for/foreach/while/until/continue>), subroutine, C<eval>, or -C<do/require/use>'d file. If more than one value is listed, the list -must be placed in parentheses. All listed elements must be legal lvalues. -Only alphanumeric identifiers may be lexically scoped--magical -builtins like C<$/> must currently be C<local>ize with "C<local>" instead. - -Unlike dynamic variables created by the "C<local>" operator, lexical -variables declared with "C<my>" are totally hidden from the outside world, -including any called subroutines (even if it's the same subroutine called -from itself or elsewhere--every call gets its own copy). - -This doesn't mean that a C<my()> variable declared in a statically -I<enclosing> lexical scope would be invisible. Only the dynamic scopes -are cut off. For example, the C<bumpx()> function below has access to the -lexical C<$x> variable because both the my and the sub occurred at the same -scope, presumably the file scope. + my $x : Foo = $y; # similar, with an attribute applied + +B<WARNING>: The use of attribute lists on C<my> declarations is +experimental. This feature should not be relied upon. It may +change or disappear in future releases of Perl. See L<attributes>. + +The C<my> operator declares the listed variables to be lexically +confined to the enclosing block, conditional (C<if/unless/elsif/else>), +loop (C<for/foreach/while/until/continue>), subroutine, C<eval>, +or C<do/require/use>'d file. If more than one value is listed, the +list must be placed in parentheses. All listed elements must be +legal lvalues. Only alphanumeric identifiers may be lexically +scoped--magical built-ins like C<$/> must currently be C<local>ize +with C<local> instead. + +Unlike dynamic variables created by the C<local> operator, lexical +variables declared with C<my> are totally hidden from the outside +world, including any called subroutines. This is true if it's the +same subroutine called from itself or elsewhere--every call gets +its own copy. + +This doesn't mean that a C<my> variable declared in a statically +enclosing lexical scope would be invisible. Only dynamic scopes +are cut off. For example, the C<bumpx()> function below has access +to the lexical $x variable because both the C<my> and the C<sub> +occurred at the same scope, presumably file scope. my $x = 10; sub bumpx { $x++ } -(An C<eval()>, however, can see the lexical variables of the scope it is -being evaluated in so long as the names aren't hidden by declarations within -the C<eval()> itself. See L<perlref>.) +An C<eval()>, however, can see lexical variables of the scope it is +being evaluated in, so long as the names aren't hidden by declarations within +the C<eval()> itself. See L<perlref>. -The parameter list to C<my()> may be assigned to if desired, which allows you +The parameter list to my() may be assigned to if desired, which allows you to initialize your variables. (If no initializer is given for a particular variable, it is created with the undefined value.) Commonly -this is used to name the parameters to a subroutine. Examples: +this is used to name input parameters to a subroutine. Examples: $arg = "fred"; # "global" variable $n = cube_root(27); @@ -250,8 +268,8 @@ this is used to name the parameters to a subroutine. Examples: return $arg; } -The "C<my>" is simply a modifier on something you might assign to. So when -you do assign to the variables in its argument list, the "C<my>" doesn't +The C<my> is simply a modifier on something you might assign to. So when +you do assign to variables in its argument list, C<my> doesn't change whether those variables are viewed as a scalar or an array. So my ($foo) = <STDIN>; # WRONG? @@ -275,24 +293,24 @@ the current statement. Thus, my $x = $x; -can be used to initialize the new $x with the value of the old C<$x>, and +can be used to initialize a new $x with the value of the old $x, and the expression my $x = 123 and $x == 123 -is false unless the old C<$x> happened to have the value C<123>. +is false unless the old $x happened to have the value C<123>. Lexical scopes of control structures are not bounded precisely by the braces that delimit their controlled blocks; control expressions are -part of the scope, too. Thus in the loop +part of that scope, too. Thus in the loop - while (defined(my $line = <>)) { + while (my $line = <>) { $line = lc $line; } continue { print $line; } -the scope of C<$line> extends from its declaration throughout the rest of +the scope of $line extends from its declaration throughout the rest of the loop construct (including the C<continue> clause), but not beyond it. Similarly, in the conditional @@ -305,44 +323,48 @@ it. Similarly, in the conditional die "'$answer' is neither 'yes' nor 'no'"; } -the scope of C<$answer> extends from its declaration throughout the rest -of the conditional (including C<elsif> and C<else> clauses, if any), +the scope of $answer extends from its declaration through the rest +of that conditional, including any C<elsif> and C<else> clauses, but not beyond it. -(None of the foregoing applies to C<if/unless> or C<while/until> +None of the foregoing text applies to C<if/unless> or C<while/until> modifiers appended to simple statements. Such modifiers are not -control structures and have no effect on scoping.) +control structures and have no effect on scoping. The C<foreach> loop defaults to scoping its index variable dynamically -(in the manner of C<local>; see below). However, if the index -variable is prefixed with the keyword "C<my>", then it is lexically -scoped instead. Thus in the loop +in the manner of C<local>. However, if the index variable is +prefixed with the keyword C<my>, or if there is already a lexical +by that name in scope, then a new lexical is created instead. Thus +in the loop for my $i (1, 2, 3) { some_function(); } -the scope of C<$i> extends to the end of the loop, but not beyond it, and -so the value of C<$i> is unavailable in C<some_function()>. +the scope of $i extends to the end of the loop, but not beyond it, +rendering the value of $i inaccessible within C<some_function()>. Some users may wish to encourage the use of lexically scoped variables. -As an aid to catching implicit references to package variables, -if you say +As an aid to catching implicit uses to package variables, +which are always global, if you say use strict 'vars'; -then any variable reference from there to the end of the enclosing -block must either refer to a lexical variable, or must be fully -qualified with the package name. A compilation error results -otherwise. An inner block may countermand this with S<"C<no strict 'vars'>">. - -A C<my()> has both a compile-time and a run-time effect. At compile time, -the compiler takes notice of it; the principle usefulness of this is to -quiet S<"C<use strict 'vars'>">. The actual initialization is delayed until -run time, so it gets executed appropriately; every time through a loop, -for example. - -Variables declared with "C<my>" are not part of any package and are therefore +then any variable mentioned from there to the end of the enclosing +block must either refer to a lexical variable, be predeclared via +C<our> or C<use vars>, or else must be fully qualified with the package name. +A compilation error results otherwise. An inner block may countermand +this with C<no strict 'vars'>. + +A C<my> has both a compile-time and a run-time effect. At compile +time, the compiler takes notice of it. The principle usefulness +of this is to quiet C<use strict 'vars'>, but it is also essential +for generation of closures as detailed in L<perlref>. Actual +initialization is delayed until run time, though, so it gets executed +at the appropriate time, such as each time through a loop, for +example. + +Variables declared with C<my> are not part of any package and are therefore never fully qualified with the package name. In particular, you're not allowed to try to make a package variable (or other global) lexical: @@ -360,13 +382,14 @@ lexical of the same name is also visible: That will print out C<20> and C<10>. -You may declare "C<my>" variables at the outermost scope of a file to hide -any such identifiers totally from the outside world. This is similar -to C's static variables at the file level. To do this with a subroutine -requires the use of a closure (anonymous function with lexical access). -If a block (such as an C<eval()>, function, or C<package>) wants to create -a private subroutine that cannot be called from outside that block, -it can declare a lexical variable containing an anonymous sub reference: +You may declare C<my> variables at the outermost scope of a file +to hide any such identifiers from the world outside that file. This +is similar in spirit to C's static variables when they are used at +the file level. To do this with a subroutine requires the use of +a closure (an anonymous function that accesses enclosing lexicals). +If you want to create a private subroutine that cannot be called +from outside that block, it can declare a lexical variable containing +an anonymous sub reference: my $secret_version = '1.001-beta'; my $secret_sub = sub { print $secret_version }; @@ -375,11 +398,13 @@ it can declare a lexical variable containing an anonymous sub reference: As long as the reference is never returned by any function within the module, no outside module can see the subroutine, because its name is not in any package's symbol table. Remember that it's not I<REALLY> called -C<$some_pack::secret_version> or anything; it's just C<$secret_version>, +C<$some_pack::secret_version> or anything; it's just $secret_version, unqualified and unqualifiable. -This does not work with object methods, however; all object methods have -to be in the symbol table of some package to be found. +This does not work with object methods, however; all object methods +have to be in the symbol table of some package to be found. See +L<perlref/"Function Templates"> for something of a work-around to +this. =head2 Persistent Private Variables @@ -415,7 +440,7 @@ and put the static variable outside the function but in the block. If this function is being sourced in from a separate file via C<require> or C<use>, then this is probably just fine. If it's -all in the main program, you'll need to arrange for the C<my()> +all in the main program, you'll need to arrange for the C<my> to be executed early, either by putting the whole block above your main program, or more likely, placing merely a C<BEGIN> sub around it to make sure it gets executed before your program @@ -428,20 +453,21 @@ starts to run: } } -See L<perlmod/"Package Constructors and Destructors"> about the C<BEGIN> function. +See L<perlmod/"Package Constructors and Destructors"> about the +special triggered functions, C<BEGIN>, C<CHECK>, C<INIT> and C<END>. -If declared at the outermost scope, the file scope, then lexicals work -someone like C's file statics. They are available to all functions in -that same file declared below them, but are inaccessible from outside of -the file. This is sometimes used in modules to create private variables -for the whole module. +If declared at the outermost scope (the file scope), then lexicals +work somewhat like C's file statics. They are available to all +functions in that same file declared below them, but are inaccessible +from outside that file. This strategy is sometimes used in modules +to create private variables that the whole module can see. =head2 Temporary Values via local() -B<NOTE>: In general, you should be using "C<my>" instead of "C<local>", because +B<WARNING>: In general, you should be using C<my> instead of C<local>, because it's faster and safer. Exceptions to this include the global punctuation variables, filehandles and formats, and direct manipulation of the Perl -symbol table itself. Format variables often use "C<local>" though, as do +symbol table itself. Format variables often use C<local> though, as do other variables whose current value must be visible to called subroutines. @@ -458,14 +484,14 @@ Synopsis: local *merlyn = 'randal'; # SAME THING: promote 'randal' to *randal local *merlyn = \$randal; # just alias $merlyn, not @merlyn etc -A C<local()> modifies its listed variables to be "local" to the enclosing -block, C<eval>, or C<do FILE>--and to I<any subroutine called from within that block>. -A C<local()> just gives temporary values to global (meaning package) -variables. It does B<not> create a local variable. This is known as -dynamic scoping. Lexical scoping is done with "C<my>", which works more -like C's auto declarations. +A C<local> modifies its listed variables to be "local" to the +enclosing block, C<eval>, or C<do FILE>--and to I<any subroutine +called from within that block>. A C<local> just gives temporary +values to global (meaning package) variables. It does I<not> create +a local variable. This is known as dynamic scoping. Lexical scoping +is done with C<my>, which works more like C's auto declarations. -If more than one variable is given to C<local()>, they must be placed in +If more than one variable is given to C<local>, they must be placed in parentheses. All listed elements must be legal lvalues. This operator works by saving the current values of those variables in its argument list on a hidden stack and restoring them upon exiting the block, subroutine, or @@ -490,7 +516,7 @@ subroutine. Examples: } # old %digits restored here -Because C<local()> is a run-time command, it gets executed every time +Because C<local> is a run-time operator, it gets executed each time through a loop. In releases of Perl previous to 5.0, this used more stack storage each time until the loop was exited. Perl now reclaims the space each time through, but it's still more efficient to declare your variables @@ -581,34 +607,54 @@ Perl will print This is a test only a test. The array has 6 elements: 0, 1, 2, undef, undef, 5 -Note also that when you C<local>ize a member of a composite type that -B<does not exist previously>, the value is treated as though it were -in an lvalue context, i.e., it is first created and then C<local>ized. -The consequence of this is that the hash or array is in fact permanently -modified. For instance, if you say +The behavior of local() on non-existent members of composite +types is subject to change in future. + +=head2 Lvalue subroutines - %hash = ( 'This' => 'is', 'a' => 'test' ); - @ary = ( 0..5 ); - { - local($ary[8]) = 0; - local($hash{'b'}) = 'whatever'; +B<WARNING>: Lvalue subroutines are still experimental and the implementation +may change in future versions of Perl. + +It is possible to return a modifiable value from a subroutine. +To do this, you have to declare the subroutine to return an lvalue. + + my $val; + sub canmod : lvalue { + $val; + } + sub nomod { + $val; } - printf "%%hash has now %d keys, \@ary %d elements.\n", - scalar(keys(%hash)), scalar(@ary); -Perl will print + canmod() = 5; # assigns to $val + nomod() = 5; # ERROR - %hash has now 3 keys, @ary 9 elements. +The scalar/list context for the subroutine and for the right-hand +side of assignment is determined as if the subroutine call is replaced +by a scalar. For example, consider: -The above behavior of local() on non-existent members of composite -types is subject to change in future. + data(2,3) = get_data(3,4); + +Both subroutines here are called in a scalar context, while in: + + (data(2,3)) = get_data(3,4); + +and in: + + (data(2),data(3)) = get_data(3,4); + +all the subroutines are called in a list context. + +The current implementation does not allow arrays and hashes to be +returned from lvalue subroutines directly. You may return a +reference instead. This restriction may be lifted in future. =head2 Passing Symbol Table Entries (typeglobs) -[Note: The mechanism described in this section was originally the only -way to simulate pass-by-reference in older versions of Perl. While it -still works fine in modern versions, the new reference mechanism is -generally easier to work with. See below.] +B<WARNING>: The mechanism described in this section was originally +the only way to simulate pass-by-reference in older versions of +Perl. While it still works fine in modern versions, the new reference +mechanism is generally easier to work with. See below. Sometimes you don't want to pass the value of an array to a subroutine but rather the name of it, so that the subroutine can modify the global @@ -621,7 +667,7 @@ funny prefix characters on variables and subroutines and such. When evaluated, the typeglob produces a scalar value that represents all the objects of that name, including any filehandle, format, or subroutine. When assigned to, it causes the name mentioned to refer to -whatever "C<*>" value was assigned to it. Example: +whatever C<*> value was assigned to it. Example: sub doubleary { local(*someary) = @_; @@ -632,7 +678,7 @@ whatever "C<*>" value was assigned to it. Example: doubleary(*foo); doubleary(*bar); -Note that scalars are already passed by reference, so you can modify +Scalars are already passed by reference, so you can modify scalar arguments without using this mechanism by referring explicitly to C<$_[0]> etc. You can modify all the elements of an array by passing all the elements as scalars, but you have to use the C<*> mechanism (or @@ -647,13 +693,13 @@ L<perldata/"Typeglobs and Filehandles">. =head2 When to Still Use local() -Despite the existence of C<my()>, there are still three places where the -C<local()> operator still shines. In fact, in these three places, you +Despite the existence of C<my>, there are still three places where the +C<local> operator still shines. In fact, in these three places, you I<must> use C<local> instead of C<my>. =over -=item 1. You need to give a global variable a temporary value, especially C<$_>. +=item 1. You need to give a global variable a temporary value, especially $_. The global variables, like C<@ARGV> or the punctuation variables, must be C<local>ized with C<local()>. This block reads in F</etc/motd>, and splits @@ -667,13 +713,13 @@ in C<@Fields>. @Fields = split /^\s*=+\s*$/; } -It particular, it's important to C<local>ize C<$_> in any routine that assigns +It particular, it's important to C<local>ize $_ in any routine that assigns to it. Look out for implicit assignments in C<while> conditionals. =item 2. You need to create a local file or directory handle or a local function. -A function that needs a filehandle of its own must use C<local()> uses -C<local()> on complete typeglob. This can be used to create new symbol +A function that needs a filehandle of its own must use +C<local()> on a complete typeglob. This can be used to create new symbol table entries: sub ioqueue { @@ -724,9 +770,9 @@ you're going to have to use an explicit pass-by-reference. Before you do that, you need to understand references as detailed in L<perlref>. This section may not make much sense to you otherwise. -Here are a few simple examples. First, let's pass in several -arrays to a function and have it C<pop> all of then, return a new -list of all their former last elements: +Here are a few simple examples. First, let's pass in several arrays +to a function and have it C<pop> all of then, returning a new list +of all their former last elements: @tailings = popmany ( \@a, \@b, \@c, \@d ); @@ -765,9 +811,10 @@ Where people get into trouble is here: or (%a, %b) = func(%c, %d); -That syntax simply won't work. It sets just C<@a> or C<%a> and clears the C<@b> or -C<%b>. Plus the function didn't get passed into two separate arrays or -hashes: it got one long list in C<@_>, as always. +That syntax simply won't work. It sets just C<@a> or C<%a> and +clears the C<@b> or C<%b>. Plus the function didn't get passed +into two separate arrays or hashes: it got one long list in C<@_>, +as always. If you can arrange for everyone to deal with this through references, it's cleaner code, although not so nice to look at. Here's a function that @@ -799,12 +846,13 @@ It turns out that you can actually do this also: } Here we're using the typeglobs to do symbol table aliasing. It's -a tad subtle, though, and also won't work if you're using C<my()> -variables, because only globals (well, and C<local()>s) are in the symbol table. +a tad subtle, though, and also won't work if you're using C<my> +variables, because only globals (even in disguise as C<local>s) +are in the symbol table. If you're passing around filehandles, you could usually just use the bare -typeglob, like C<*STDOUT>, but typeglobs references would be better because -they'll still work properly under S<C<use strict 'refs'>>. For example: +typeglob, like C<*STDOUT>, but typeglobs references work, too. +For example: splutter(\*STDOUT); sub splutter { @@ -818,45 +866,41 @@ they'll still work properly under S<C<use strict 'refs'>>. For example: return scalar <$fh>; } -Another way to do this is using C<*HANDLE{IO}>, see L<perlref> for usage -and caveats. - -If you're planning on generating new filehandles, you could do this: +If you're planning on generating new filehandles, you could do this. +Notice to pass back just the bare *FH, not its reference. sub openit { - my $name = shift; + my $path = shift; local *FH; return open (FH, $path) ? *FH : undef; } -Although that will actually produce a small memory leak. See the bottom -of L<perlfunc/open()> for a somewhat cleaner way using the C<IO::Handle> -package. - =head2 Prototypes -As of the 5.002 release of perl, if you declare +Perl supports a very limited kind of compile-time argument checking +using function prototyping. If you declare sub mypush (\@@) -then C<mypush()> takes arguments exactly like C<push()> does. The declaration -of the function to be called must be visible at compile time. The prototype -affects only the interpretation of new-style calls to the function, where -new-style is defined as not using the C<&> character. In other words, -if you call it like a builtin function, then it behaves like a builtin -function. If you call it like an old-fashioned subroutine, then it -behaves like an old-fashioned subroutine. It naturally falls out from -this rule that prototypes have no influence on subroutine references -like C<\&foo> or on indirect subroutine calls like C<&{$subref}> or -C<$subref-E<gt>()>. +then C<mypush()> takes arguments exactly like C<push()> does. The +function declaration must be visible at compile time. The prototype +affects only interpretation of new-style calls to the function, +where new-style is defined as not using the C<&> character. In +other words, if you call it like a built-in function, then it behaves +like a built-in function. If you call it like an old-fashioned +subroutine, then it behaves like an old-fashioned subroutine. It +naturally falls out from this rule that prototypes have no influence +on subroutine references like C<\&foo> or on indirect subroutine +calls like C<&{$subref}> or C<< $subref->() >>. Method calls are not influenced by prototypes either, because the -function to be called is indeterminate at compile time, because it depends -on inheritance. +function to be called is indeterminate at compile time, since +the exact code called depends on inheritance. -Because the intent is primarily to let you define subroutines that work -like builtin commands, here are the prototypes for some other functions -that parse almost exactly like the corresponding builtins. +Because the intent of this feature is primarily to let you define +subroutines that work like built-in functions, here are prototypes +for some other functions that parse almost exactly like the +corresponding built-in. Declared as Called as @@ -877,35 +921,45 @@ that parse almost exactly like the corresponding builtins. Any backslashed prototype character represents an actual argument that absolutely must start with that character. The value passed -to the subroutine (as part of C<@_>) will be a reference to the -actual argument given in the subroutine call, obtained by applying -C<\> to that argument. +as part of C<@_> will be a reference to the actual argument given +in the subroutine call, obtained by applying C<\> to that argument. Unbackslashed prototype characters have special meanings. Any -unbackslashed C<@> or C<%> eats all the rest of the arguments, and forces +unbackslashed C<@> or C<%> eats all remaining arguments, and forces list context. An argument represented by C<$> forces scalar context. An C<&> requires an anonymous subroutine, which, if passed as the first -argument, does not require the "C<sub>" keyword or a subsequent comma. A -C<*> allows the subroutine to accept a bareword, constant, scalar expression, +argument, does not require the C<sub> keyword or a subsequent comma. + +A C<*> allows the subroutine to accept a bareword, constant, scalar expression, typeglob, or a reference to a typeglob in that slot. The value will be available to the subroutine either as a simple scalar, or (in the latter -two cases) as a reference to the typeglob. +two cases) as a reference to the typeglob. If you wish to always convert +such arguments to a typeglob reference, use Symbol::qualify_to_ref() as +follows: + + use Symbol 'qualify_to_ref'; + + sub foo (*) { + my $fh = qualify_to_ref(shift, caller); + ... + } A semicolon separates mandatory arguments from optional arguments. -(It is redundant before C<@> or C<%>.) +It is redundant before C<@> or C<%>, which gobble up everything else. -Note how the last three examples above are treated specially by the parser. -C<mygrep()> is parsed as a true list operator, C<myrand()> is parsed as a -true unary operator with unary precedence the same as C<rand()>, and -C<mytime()> is truly without arguments, just like C<time()>. That is, if you -say +Note how the last three examples in the table above are treated +specially by the parser. C<mygrep()> is parsed as a true list +operator, C<myrand()> is parsed as a true unary operator with unary +precedence the same as C<rand()>, and C<mytime()> is truly without +arguments, just like C<time()>. That is, if you say mytime +2; you'll get C<mytime() + 2>, not C<mytime(2)>, which is how it would be parsed -without the prototype. +without a prototype. -The interesting thing about C<&> is that you can generate new syntax with it: +The interesting thing about C<&> is that you can generate new syntax with it, +provided it's in the initial position: sub try (&@) { my($try,$catch) = @_; @@ -924,12 +978,12 @@ The interesting thing about C<&> is that you can generate new syntax with it: }; That prints C<"unphooey">. (Yes, there are still unresolved -issues having to do with the visibility of C<@_>. I'm ignoring that +issues having to do with visibility of C<@_>. I'm ignoring that question for the moment. (But note that if we make C<@_> lexically scoped, those anonymous subroutines can act like closures... (Gee, is this sounding a little Lispish? (Never mind.)))) -And here's a reimplementation of C<grep>: +And here's a reimplementation of the Perl C<grep> operator: sub mygrep (&@) { my $code = shift; @@ -965,12 +1019,12 @@ returning a list: func(@foo); func( split /:/ ); -Then you've just supplied an automatic C<scalar()> in front of their +Then you've just supplied an automatic C<scalar> in front of their argument, which can be more than a bit surprising. The old C<@foo> which used to hold one thing doesn't get passed in. Instead, -the C<func()> now gets passed in C<1>, that is, the number of elements -in C<@foo>. And the C<split()> gets called in a scalar context and -starts scribbling on your C<@_> parameter list. +C<func()> now gets passed in a C<1>; that is, the number of elements +in C<@foo>. And the C<split> gets called in scalar context so it +starts scribbling on your C<@_> parameter list. Ouch! This is all very powerful, of course, and should be used only in moderation to make the world a better place. @@ -978,12 +1032,11 @@ to make the world a better place. =head2 Constant Functions Functions with a prototype of C<()> are potential candidates for -inlining. If the result after optimization and constant folding is -either a constant or a lexically-scoped scalar which has no other +inlining. If the result after optimization and constant folding +is either a constant or a lexically-scoped scalar which has no other references, then it will be used in place of function calls made -without C<&> or C<do>. Calls made using C<&> or C<do> are never -inlined. (See F<constant.pm> for an easy way to declare most -constants.) +without C<&>. Calls made using C<&> are never inlined. (See +F<constant.pm> for an easy way to declare most constants.) The following functions would all be inlined: @@ -1019,55 +1072,57 @@ a mandatory warning. (You can use this warning to tell whether or not a particular subroutine is considered constant.) The warning is considered severe enough not to be optional because previously compiled invocations of the function will still be using the old value of the -function. If you need to be able to redefine the subroutine you need to +function. If you need to be able to redefine the subroutine, you need to ensure that it isn't inlined, either by dropping the C<()> prototype -(which changes the calling semantics, so beware) or by thwarting the +(which changes calling semantics, so beware) or by thwarting the inlining mechanism in some other way, such as sub not_inlined () { 23 if $]; } -=head2 Overriding Builtin Functions +=head2 Overriding Built-in Functions -Many builtin functions may be overridden, though this should be tried +Many built-in functions may be overridden, though this should be tried only occasionally and for good reason. Typically this might be -done by a package attempting to emulate missing builtin functionality +done by a package attempting to emulate missing built-in functionality on a non-Unix system. Overriding may be done only by importing the name from a module--ordinary predeclaration isn't good enough. However, the -C<subs> pragma (compiler directive) lets you, in effect, predeclare subs -via the import syntax, and these names may then override the builtin ones: +C<use subs> pragma lets you, in effect, predeclare subs +via the import syntax, and these names may then override built-in ones: use subs 'chdir', 'chroot', 'chmod', 'chown'; chdir $somewhere; sub chdir { ... } -To unambiguously refer to the builtin form, one may precede the -builtin name with the special package qualifier C<CORE::>. For example, -saying C<CORE::open()> will always refer to the builtin C<open()>, even +To unambiguously refer to the built-in form, precede the +built-in name with the special package qualifier C<CORE::>. For example, +saying C<CORE::open()> always refers to the built-in C<open()>, even if the current package has imported some other subroutine called -C<&open()> from elsewhere. +C<&open()> from elsewhere. Even though it looks like a regular +function call, it isn't: you can't take a reference to it, such as +the incorrect C<\&CORE::open> might appear to produce. -Library modules should not in general export builtin names like "C<open>" -or "C<chdir>" as part of their default C<@EXPORT> list, because these may +Library modules should not in general export built-in names like C<open> +or C<chdir> as part of their default C<@EXPORT> list, because these may sneak into someone else's namespace and change the semantics unexpectedly. -Instead, if the module adds the name to the C<@EXPORT_OK> list, then it's +Instead, if the module adds that name to C<@EXPORT_OK>, then it's possible for a user to import the name explicitly, but not implicitly. That is, they could say use Module 'open'; -and it would import the C<open> override, but if they said +and it would import the C<open> override. But if they said use Module; -they would get the default imports without the overrides. +they would get the default imports without overrides. -The foregoing mechanism for overriding builtins is restricted, quite +The foregoing mechanism for overriding built-in is restricted, quite deliberately, to the package that requests the import. There is a second -method that is sometimes applicable when you wish to override a builtin +method that is sometimes applicable when you wish to override a built-in everywhere, without regard to namespace boundaries. This is achieved by importing a sub into the special namespace C<CORE::GLOBAL::>. Here is an example that quite brazenly replaces the C<glob> operator with something @@ -1089,9 +1144,12 @@ that understands regular expressions. sub glob { my $pat = shift; my @got; - local(*D); - if (opendir D, '.') { @got = grep /$pat/, readdir D; closedir D; } - @got; + local *D; + if (opendir D, '.') { + @got = grep /$pat/, readdir D; + closedir D; + } + return @got; } 1; @@ -1102,44 +1160,45 @@ And here's how it could be (ab)used: use REGlob 'glob'; # override glob() in Foo:: only print for <^[a-z_]+\.pm\$>; # show all pragmatic modules -Note that the initial comment shows a contrived, even dangerous example. +The initial comment shows a contrived, even dangerous example. By overriding C<glob> globally, you would be forcing the new (and -subversive) behavior for the C<glob> operator for B<every> namespace, +subversive) behavior for the C<glob> operator for I<every> namespace, without the complete cognizance or cooperation of the modules that own those namespaces. Naturally, this should be done with extreme caution--if it must be done at all. The C<REGlob> example above does not implement all the support needed to -cleanly override perl's C<glob> operator. The builtin C<glob> has +cleanly override perl's C<glob> operator. The built-in C<glob> has different behaviors depending on whether it appears in a scalar or list -context, but our C<REGlob> doesn't. Indeed, many perl builtins have such +context, but our C<REGlob> doesn't. Indeed, many perl built-in have such context sensitive behaviors, and these must be adequately supported by a properly written override. For a fully functional example of overriding C<glob>, study the implementation of C<File::DosGlob> in the standard library. - =head2 Autoloading -If you call a subroutine that is undefined, you would ordinarily get an -immediate fatal error complaining that the subroutine doesn't exist. -(Likewise for subroutines being used as methods, when the method -doesn't exist in any base class of the class package.) If, -however, there is an C<AUTOLOAD> subroutine defined in the package or -packages that were searched for the original subroutine, then that -C<AUTOLOAD> subroutine is called with the arguments that would have been -passed to the original subroutine. The fully qualified name of the -original subroutine magically appears in the C<$AUTOLOAD> variable in the -same package as the C<AUTOLOAD> routine. The name is not passed as an -ordinary argument because, er, well, just because, that's why... - -Most C<AUTOLOAD> routines will load in a definition for the subroutine in -question using eval, and then execute that subroutine using a special -form of "goto" that erases the stack frame of the C<AUTOLOAD> routine -without a trace. (See the standard C<AutoLoader> module, for example.) -But an C<AUTOLOAD> routine can also just emulate the routine and never -define it. For example, let's pretend that a function that wasn't defined -should just call C<system()> with those arguments. All you'd do is this: +If you call a subroutine that is undefined, you would ordinarily +get an immediate, fatal error complaining that the subroutine doesn't +exist. (Likewise for subroutines being used as methods, when the +method doesn't exist in any base class of the class's package.) +However, if an C<AUTOLOAD> subroutine is defined in the package or +packages used to locate the original subroutine, then that +C<AUTOLOAD> subroutine is called with the arguments that would have +been passed to the original subroutine. The fully qualified name +of the original subroutine magically appears in the global $AUTOLOAD +variable of the same package as the C<AUTOLOAD> routine. The name +is not passed as an ordinary argument because, er, well, just +because, that's why... + +Many C<AUTOLOAD> routines load in a definition for the requested +subroutine using eval(), then execute that subroutine using a special +form of goto() that erases the stack frame of the C<AUTOLOAD> routine +without a trace. (See the source to the standard module documented +in L<AutoLoader>, for example.) But an C<AUTOLOAD> routine can +also just emulate the routine and never define it. For example, +let's pretend that a function that wasn't defined should just invoke +C<system> with those arguments. All you'd do is: sub AUTOLOAD { my $program = $AUTOLOAD; @@ -1150,8 +1209,8 @@ should just call C<system()> with those arguments. All you'd do is this: who('am', 'i'); ls('-l'); -In fact, if you predeclare the functions you want to call that way, you don't -even need the parentheses: +In fact, if you predeclare functions you want to call that way, you don't +even need parentheses: use subs qw(date who ls); date; @@ -1159,16 +1218,59 @@ even need the parentheses: ls -l; A more complete example of this is the standard Shell module, which -can treat undefined subroutine calls as calls to Unix programs. +can treat undefined subroutine calls as calls to external programs. -Mechanisms are available for modules writers to help split the modules -up into autoloadable files. See the standard AutoLoader module +Mechanisms are available to help modules writers split their modules +into autoloadable files. See the standard AutoLoader module described in L<AutoLoader> and in L<AutoSplit>, the standard SelfLoader modules in L<SelfLoader>, and the document on adding C -functions to perl code in L<perlxs>. +functions to Perl code in L<perlxs>. + +=head2 Subroutine Attributes + +A subroutine declaration or definition may have a list of attributes +associated with it. If such an attribute list is present, it is +broken up at space or colon boundaries and treated as though a +C<use attributes> had been seen. See L<attributes> for details +about what attributes are currently supported. +Unlike the limitation with the obsolescent C<use attrs>, the +C<sub : ATTRLIST> syntax works to associate the attributes with +a pre-declaration, and not just with a subroutine definition. + +The attributes must be valid as simple identifier names (without any +punctuation other than the '_' character). They may have a parameter +list appended, which is only checked for whether its parentheses ('(',')') +nest properly. + +Examples of valid syntax (even though the attributes are unknown): + + sub fnord (&\%) : switch(10,foo(7,3)) : expensive ; + sub plugh () : Ugly('\(") :Bad ; + sub xyzzy : _5x5 { ... } + +Examples of invalid syntax: + + sub fnord : switch(10,foo() ; # ()-string not balanced + sub snoid : Ugly('(') ; # ()-string not balanced + sub xyzzy : 5x5 ; # "5x5" not a valid identifier + sub plugh : Y2::north ; # "Y2::north" not a simple identifier + sub snurt : foo + bar ; # "+" not a colon or space + +The attribute list is passed as a list of constant strings to the code +which associates them with the subroutine. In particular, the second example +of valid syntax above currently looks like this in terms of how it's +parsed and invoked: + + use attributes __PACKAGE__, \&plugh, q[Ugly('\(")], 'Bad'; + +For further details on attribute lists and their manipulation, +see L<attributes>. =head1 SEE ALSO -See L<perlref> for more about references and closures. See L<perlxs> if -you'd like to learn about calling C subroutines from perl. See L<perlmod> -to learn about bundling up your functions in separate files. +See L<perlref/"Function Templates"> for more about references and closures. +See L<perlxs> if you'd like to learn about calling C subroutines from Perl. +See L<perlembed> if you'd like to learn about calling PErl subroutines from C. +See L<perlmod> to learn about bundling up your functions in separate files. +See L<perlmodlib> to learn what library modules come standard on your system. +See L<perltoot> to learn how to make object method calls. diff --git a/contrib/perl5/pod/perlsyn.pod b/contrib/perl5/pod/perlsyn.pod index a3bc5ab..724ba12 100644 --- a/contrib/perl5/pod/perlsyn.pod +++ b/contrib/perl5/pod/perlsyn.pod @@ -5,21 +5,14 @@ perlsyn - Perl syntax =head1 DESCRIPTION A Perl script consists of a sequence of declarations and statements. -The only things that need to be declared in Perl are report formats -and subroutines. See the sections below for more information on those -declarations. All uninitialized user-created objects are assumed to -start with a C<null> or C<0> value until they are defined by some explicit -operation such as assignment. (Though you can get warnings about the -use of undefined values if you like.) The sequence of statements is -executed just once, unlike in B<sed> and B<awk> scripts, where the -sequence of statements is executed for each input line. While this means -that you must explicitly loop over the lines of your input file (or -files), it also means you have much more control over which files and -which lines you look at. (Actually, I'm lying--it is possible to do an -implicit loop with either the B<-n> or B<-p> switch. It's just not the -mandatory default like it is in B<sed> and B<awk>.) - -=head2 Declarations +The sequence of statements is executed just once, unlike in B<sed> +and B<awk> scripts, where the sequence of statements is executed +for each input line. While this means that you must explicitly +loop over the lines of your input file (or files), it also means +you have much more control over which files and which lines you look at. +(Actually, I'm lying--it is possible to do an implicit loop with +either the B<-n> or B<-p> switch. It's just not the mandatory +default like it is in B<sed> and B<awk>.) Perl is, for the most part, a free-form language. (The only exception to this is format declarations, for obvious reasons.) Text from a @@ -29,11 +22,27 @@ interpreted either as division or pattern matching, depending on the context, and C++ C<//> comments just look like a null regular expression, so don't do that. +=head2 Declarations + +The only things you need to declare in Perl are report formats +and subroutines--and even undefined subroutines can be handled +through AUTOLOAD. A variable holds the undefined value (C<undef>) +until it has been assigned a defined value, which is anything +other than C<undef>. When used as a number, C<undef> is treated +as C<0>; when used as a string, it is treated the empty string, +C<"">; and when used as a reference that isn't being assigned +to, it is treated as an error. If you enable warnings, you'll +be notified of an uninitialized value whenever you treat C<undef> +as a string or a number. Well, usually. Boolean ("don't-care") +contexts and operators such as C<++>, C<-->, C<+=>, C<-=>, and +C<.=> are always exempt from such warnings. + A declaration can be put anywhere a statement can, but has no effect on the execution of the primary sequence of statements--declarations all take effect at compile time. Typically all the declarations are put at the beginning or the end of the script. However, if you're using -lexically-scoped private variables created with C<my()>, you'll have to make sure +lexically-scoped private variables created with C<my()>, you'll +have to make sure your format or subroutine definition is within the same block scope as the my if you expect to be able to access those private variables. @@ -44,7 +53,7 @@ subroutine without defining it by saying C<sub name>, thus: sub myname; $me = myname $0 or die "can't get myname"; -Note that it functions as a list operator, not as a unary operator; so +Note that my() functions as a list operator, not as a unary operator; so be careful to use C<or> instead of C<||> in this case. However, if you were to declare the subroutine as C<sub myname ($)>, then C<myname> would function as a unary operator, so either C<or> or @@ -86,7 +95,7 @@ presuming you're a speaker of English. The C<foreach> modifier is an iterator: For each value in EXPR, it aliases C<$_> to the value and executes the statement. The C<while> and C<until> modifiers have the usual "C<while> loop" semantics (conditional evaluated first), except -when applied to a C<do>-BLOCK (or to the now-deprecated C<do>-SUBROUTINE +when applied to a C<do>-BLOCK (or to the deprecated C<do>-SUBROUTINE statement), in which case the block executes once before the conditional is evaluated. This is so that you can write loops like: @@ -134,6 +143,7 @@ The following compound statements may be used to control flow: LABEL while (EXPR) BLOCK continue BLOCK LABEL for (EXPR; EXPR; EXPR) BLOCK LABEL foreach VAR (LIST) BLOCK + LABEL foreach VAR (LIST) BLOCK continue BLOCK LABEL BLOCK continue BLOCK Note that, unlike C and Pascal, these are defined in terms of BLOCKs, @@ -154,13 +164,17 @@ C<if> an C<else> goes with. If you use C<unless> in place of C<if>, the sense of the test is reversed. The C<while> statement executes the block as long as the expression is -true (does not evaluate to the null string (C<"">) or C<0> or C<"0")>. The LABEL is -optional, and if present, consists of an identifier followed by a colon. -The LABEL identifies the loop for the loop control statements C<next>, -C<last>, and C<redo>. If the LABEL is omitted, the loop control statement +true (does not evaluate to the null string C<""> or C<0> or C<"0">). +The LABEL is optional, and if present, consists of an identifier followed +by a colon. The LABEL identifies the loop for the loop control +statements C<next>, C<last>, and C<redo>. +If the LABEL is omitted, the loop control statement refers to the innermost enclosing loop. This may include dynamically looking back your call-stack at run time to find the LABEL. Such -desperate behavior triggers a warning if you use the B<-w> flag. +desperate behavior triggers a warning if you use the C<use warnings> +praga or the B<-w> flag. +Unlike a C<foreach> statement, a C<while> statement never implicitly +localises any variables. If there is a C<continue> BLOCK, it is always executed just before the conditional is about to be evaluated again, just like the third part of a @@ -289,9 +303,7 @@ is therefore visible only within the loop. Otherwise, the variable is implicitly local to the loop and regains its former value upon exiting the loop. If the variable was previously declared with C<my>, it uses that variable instead of the global one, but it's still localized to -the loop. (Note that a lexically scoped variable can cause problems -if you have subroutine or format declarations within the loop which -refer to it.) +the loop. The C<foreach> keyword is actually a synonym for the C<for> keyword, so you can use C<foreach> for readability or C<for> for brevity. (Or because @@ -312,7 +324,7 @@ Examples: for (@ary) { s/foo/bar/ } - foreach my $elem (@elements) { + for my $elem (@elements) { $elem *= 2; } @@ -341,8 +353,8 @@ Here's how a C programmer might code up a particular algorithm in Perl: Whereas here's how a Perl programmer more comfortable with the idiom might do it: - OUTER: foreach my $wid (@ary1) { - INNER: foreach my $jet (@ary2) { + OUTER: for my $wid (@ary1) { + INNER: for my $jet (@ary2) { next OUTER if $wid > $jet; $wid += $jet; } @@ -490,15 +502,15 @@ C<HTTP_USER_AGENT> envariable. That kind of switch statement only works when you know the C<&&> clauses will be true. If you don't, the previous C<?:> example should be used. -You might also consider writing a hash instead of synthesizing a C<switch> -statement. +You might also consider writing a hash of subroutine references +instead of synthesizing a C<switch> statement. =head2 Goto -Although not for the faint of heart, Perl does support a C<goto> statement. -A loop's LABEL is not actually a valid target for a C<goto>; -it's just the name of the loop. There are three forms: C<goto>-LABEL, -C<goto>-EXPR, and C<goto>-&NAME. +Although not for the faint of heart, Perl does support a C<goto> +statement. There are three forms: C<goto>-LABEL, C<goto>-EXPR, and +C<goto>-&NAME. A loop's LABEL is not actually a valid target for +a C<goto>; it's just the name of the loop. The C<goto>-LABEL form finds the statement labeled with LABEL and resumes execution there. It may not be used to go into any construct that @@ -513,7 +525,7 @@ The C<goto>-EXPR form expects a label name, whose scope will be resolved dynamically. This allows for computed C<goto>s per FORTRAN, but isn't necessarily recommended if you're optimizing for maintainability: - goto ("FOO", "BAR", "GLARCH")[$i]; + goto(("FOO", "BAR", "GLARCH")[$i]); The C<goto>-&NAME form is highly magical, and substitutes a call to the named subroutine for the currently running subroutine. This is used by @@ -582,7 +594,7 @@ this, one can control Perl's idea of filenames and line numbers in error or warning messages (especially for strings that are processed with C<eval()>). The syntax for this mechanism is the same as for most C preprocessors: it matches the regular expression -C</^#\s*line\s+(\d+)\s*(?:\s"([^"]*)")?/> with C<$1> being the line +C</^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/> with C<$1> being the line number for the next line, and C<$2> being the optional filename (specified within quotes). diff --git a/contrib/perl5/pod/perlthrtut.pod b/contrib/perl5/pod/perlthrtut.pod index f2ca3bd..0f15d57 100644 --- a/contrib/perl5/pod/perlthrtut.pod +++ b/contrib/perl5/pod/perlthrtut.pod @@ -4,8 +4,16 @@ perlthrtut - tutorial on threads in Perl =head1 DESCRIPTION + WARNING: Threading is an experimental feature. Both the interface + and implementation are subject to change drastically. In fact, this + documentation describes the flavor of threads that was in version + 5.005. Perl 5.6.0 and later have the beginnings of support for + interpreter threads, which (when finished) is expected to be + significantly different from what is described here. The information + contained here may therefore soon be obsolete. Use at your own risk! + One of the most prominent new features of Perl 5.005 is the inclusion -of threads. Threads make a number of things a lot easier, and are a +of threads. Threads make a number of things a lot easier, and are a very useful addition to your bag of programming tricks. =head1 What Is A Thread Anyway? @@ -14,44 +22,44 @@ A thread is a flow of control through a program with a single execution point. Sounds an awful lot like a process, doesn't it? Well, it should. -Threads are one of the pieces of a process. Every process has at least +Threads are one of the pieces of a process. Every process has at least one thread and, up until now, every process running Perl had only one -thread. With 5.005, though, you can create extra threads. We're going +thread. With 5.005, though, you can create extra threads. We're going to show you how, when, and why. =head1 Threaded Program Models There are three basic ways that you can structure a threaded -program. Which model you choose depends on what you need your program -to do. For many non-trivial threaded programs you'll need to choose +program. Which model you choose depends on what you need your program +to do. For many non-trivial threaded programs you'll need to choose different models for different pieces of your program. =head2 Boss/Worker The boss/worker model usually has one `boss' thread and one or more -`worker' threads. The boss thread gathers or generates tasks that need +`worker' threads. The boss thread gathers or generates tasks that need to be done, then parcels those tasks out to the appropriate worker thread. This model is common in GUI and server programs, where a main thread waits for some event and then passes that event to the appropriate -worker threads for processing. Once the event has been passed on, the +worker threads for processing. Once the event has been passed on, the boss thread goes back to waiting for another event. -The boss thread does relatively little work. While tasks aren't +The boss thread does relatively little work. While tasks aren't necessarily performed faster than with any other method, it tends to have the best user-response times. =head2 Work Crew In the work crew model, several threads are created that do -essentially the same thing to different pieces of data. It closely +essentially the same thing to different pieces of data. It closely mirrors classical parallel processing and vector processors, where a large array of processors do the exact same thing to many pieces of data. This model is particularly useful if the system running the program -will distribute multiple threads across different processors. It can +will distribute multiple threads across different processors. It can also be useful in ray tracing or rendering engines, where the individual threads can pass on interim results to give the user visual feedback. @@ -60,29 +68,29 @@ feedback. The pipeline model divides up a task into a series of steps, and passes the results of one step on to the thread processing the -next. Each thread does one thing to each piece of data and passes the +next. Each thread does one thing to each piece of data and passes the results to the next thread in line. This model makes the most sense if you have multiple processors so two or more threads will be executing in parallel, though it can often -make sense in other contexts as well. It tends to keep the individual +make sense in other contexts as well. It tends to keep the individual tasks small and simple, as well as allowing some parts of the pipeline to block (on I/O or system calls, for example) while other parts keep -going. If you're running different parts of the pipeline on different +going. If you're running different parts of the pipeline on different processors you may also take advantage of the caches on each processor. This model is also handy for a form of recursive programming where, rather than having a subroutine call itself, it instead creates -another thread. Prime and Fibonacci generators both map well to this +another thread. Prime and Fibonacci generators both map well to this form of the pipeline model. (A version of a prime number generator is presented later on.) =head1 Native threads -There are several different ways to implement threads on a system. How +There are several different ways to implement threads on a system. How threads are implemented depends both on the vendor and, in some cases, -the version of the operating system. Often the first implementation +the version of the operating system. Often the first implementation will be relatively simple, but later versions of the OS will be more sophisticated. @@ -93,42 +101,42 @@ There are three basic categories of threads-user-mode threads, kernel threads, and multiprocessor kernel threads. User-mode threads are threads that live entirely within a program and -its libraries. In this model, the OS knows nothing about threads. As +its libraries. In this model, the OS knows nothing about threads. As far as it's concerned, your process is just a process. This is the easiest way to implement threads, and the way most OSes -start. The big disadvantage is that, since the OS knows nothing about -threads, if one thread blocks they all do. Typical blocking activities +start. The big disadvantage is that, since the OS knows nothing about +threads, if one thread blocks they all do. Typical blocking activities include most system calls, most I/O, and things like sleep(). -Kernel threads are the next step in thread evolution. The OS knows +Kernel threads are the next step in thread evolution. The OS knows about kernel threads, and makes allowances for them. The main difference between a kernel thread and a user-mode thread is -blocking. With kernel threads, things that block a single thread don't -block other threads. This is not the case with user-mode threads, +blocking. With kernel threads, things that block a single thread don't +block other threads. This is not the case with user-mode threads, where the kernel blocks at the process level and not the thread level. This is a big step forward, and can give a threaded program quite a performance boost over non-threaded programs. Threads that block performing I/O, for example, won't block threads that are doing other -things. Each process still has only one thread running at once, +things. Each process still has only one thread running at once, though, regardless of how many CPUs a system might have. Since kernel threading can interrupt a thread at any time, they will uncover some of the implicit locking assumptions you may make in your -program. For example, something as simple as C<$a = $a + 2> can behave -unpredictably with kernel threads if C<$a> is visible to other -threads, as another thread may have changed C<$a> between the time it +program. For example, something as simple as C<$a = $a + 2> can behave +unpredictably with kernel threads if $a is visible to other +threads, as another thread may have changed $a between the time it was fetched on the right hand side and the time the new value is stored. Multiprocessor Kernel Threads are the final step in thread -support. With multiprocessor kernel threads on a machine with multiple +support. With multiprocessor kernel threads on a machine with multiple CPUs, the OS may schedule two or more threads to run simultaneously on different CPUs. This can give a serious performance boost to your threaded program, -since more than one thread will be executing at the same time. As a +since more than one thread will be executing at the same time. As a tradeoff, though, any of those nagging synchronization issues that might not have shown with basic kernel threads will appear with a vengeance. @@ -138,14 +146,14 @@ different OSes (and different thread implementations for a particular OS) allocate CPU cycles to threads in different ways. Cooperative multitasking systems have running threads give up control -if one of two things happen. If a thread calls a yield function, it -gives up control. It also gives up control if the thread does -something that would cause it to block, such as perform I/O. In a +if one of two things happen. If a thread calls a yield function, it +gives up control. It also gives up control if the thread does +something that would cause it to block, such as perform I/O. In a cooperative multitasking implementation, one thread can starve all the others for CPU time if it so chooses. Preemptive multitasking systems interrupt threads at regular intervals -while the system decides which thread should run next. In a preemptive +while the system decides which thread should run next. In a preemptive multitasking system, one thread usually won't monopolize the CPU. On some systems, there can be cooperative and preemptive threads @@ -156,18 +164,18 @@ normal priorities behave preemptively.) =head1 What kind of threads are perl threads? If you have experience with other thread implementations, you might -find that things aren't quite what you expect. It's very important to +find that things aren't quite what you expect. It's very important to remember when dealing with Perl threads that Perl Threads Are Not X Threads, for all values of X. They aren't POSIX threads, or -DecThreads, or Java's Green threads, or Win32 threads. There are +DecThreads, or Java's Green threads, or Win32 threads. There are similarities, and the broad concepts are the same, but if you start looking for implementation details you're going to be either -disappointed or confused. Possibly both. +disappointed or confused. Possibly both. This is not to say that Perl threads are completely different from -everything that's ever come before--they're not. Perl's threading -model owes a lot to other thread models, especially POSIX. Just as -Perl is not C, though, Perl threads are not POSIX threads. So if you +everything that's ever come before--they're not. Perl's threading +model owes a lot to other thread models, especially POSIX. Just as +Perl is not C, though, Perl threads are not POSIX threads. So if you find yourself looking for mutexes, or thread priorities, it's time to step back a bit and think about what you want to do and how Perl can do it. @@ -175,28 +183,28 @@ do it. =head1 Threadsafe Modules The addition of threads has changed Perl's internals -substantially. There are implications for people who write -modules--especially modules with XS code or external libraries. While +substantially. There are implications for people who write +modules--especially modules with XS code or external libraries. While most modules won't encounter any problems, modules that aren't explicitly tagged as thread-safe should be tested before being used in production code. Not all modules that you might use are thread-safe, and you should always assume a module is unsafe unless the documentation says -otherwise. This includes modules that are distributed as part of the -core. Threads are a beta feature, and even some of the standard +otherwise. This includes modules that are distributed as part of the +core. Threads are a beta feature, and even some of the standard modules aren't thread-safe. If you're using a module that's not thread-safe for some reason, you can protect yourself by using semaphores and lots of programming -discipline to control access to the module. Semaphores are covered +discipline to control access to the module. Semaphores are covered later in the article. Perl Threads Are Different =head1 Thread Basics The core Thread module provides the basic functions you need to write -threaded programs. In the following sections we'll cover the basics, -showing you what you need to do to create a threaded program. After +threaded programs. In the following sections we'll cover the basics, +showing you what you need to do to create a threaded program. After that, we'll go over some of the features of the Thread module that make threaded programming easier. @@ -208,7 +216,7 @@ your programs are compiled. If your Perl wasn't compiled with thread support enabled, then any attempt to use threads will fail. Remember that the threading support in 5.005 is in beta release, and -should be treated as such. You should expect that it may not function +should be treated as such. You should expect that it may not function entirely properly, and the thread interface may well change some before it is a fully supported, production release. The beta version shouldn't be used for mission-critical projects. Having said that, @@ -237,13 +245,13 @@ have code like this: Since code that runs both with and without threads is usually pretty messy, it's best to isolate the thread-specific code in its own -module. In our example above, that's what MyMod_threaded is, and it's +module. In our example above, that's what MyMod_threaded is, and it's only imported if we're running on a threaded Perl. =head2 Creating Threads The Thread package provides the tools you need to create new -threads. Like any other module, you need to tell Perl you want to use +threads. Like any other module, you need to tell Perl you want to use it; use Thread imports all the pieces you need to create basic threads. @@ -258,11 +266,11 @@ The simplest, straightforward way to create a thread is with new(): } The new() method takes a reference to a subroutine and creates a new -thread, which starts executing in the referenced subroutine. Control +thread, which starts executing in the referenced subroutine. Control then passes both to the subroutine and the caller. If you need to, your program can pass parameters to the subroutine as -part of the thread startup. Just include the list of parameters as +part of the thread startup. Just include the list of parameters as part of the C<Thread::new> call, like this: use Thread; @@ -281,8 +289,8 @@ part of the C<Thread::new> call, like this: The subroutine runs like a normal Perl subroutine, and the call to new Thread returns whatever the subroutine returns. -The last example illustrates another feature of threads. You can spawn -off several threads using the same subroutine. Each thread executes +The last example illustrates another feature of threads. You can spawn +off several threads using the same subroutine. Each thread executes the same subroutine, but in a separate thread with a separate environment and potentially separate arguments. @@ -305,22 +313,22 @@ spin off a chunk of code like eval(), but into its own thread: You'll notice we did a use Thread qw(async) in that example. async is not exported by default, so if you want it, you'll either need to import it before you use it or fully qualify it as -Thread::async. You'll also note that there's a semicolon after the -closing brace. That's because async() treats the following block as an +Thread::async. You'll also note that there's a semicolon after the +closing brace. That's because async() treats the following block as an anonymous subroutine, so the semicolon is necessary. Like eval(), the code executes in the same context as it would if it -weren't spun off. Since both the code inside and after the async start -executing, you need to be careful with any shared resources. Locking +weren't spun off. Since both the code inside and after the async start +executing, you need to be careful with any shared resources. Locking and other synchronization techniques are covered later. =head2 Giving up control There are times when you may find it useful to have a thread -explicitly give up the CPU to another thread. Your threading package +explicitly give up the CPU to another thread. Your threading package might not support preemptive multitasking for threads, for example, or you may be doing something compute-intensive and want to make sure -that the user-interface thread gets called frequently. Regardless, +that the user-interface thread gets called frequently. Regardless, there are times that you might want a thread to give up the processor. Perl's threading package provides the yield() function that does @@ -344,7 +352,7 @@ this. yield() is pretty straightforward, and works like this: =head2 Waiting For A Thread To Exit -Since threads are also subroutines, they can return values. To wait +Since threads are also subroutines, they can return values. To wait for a thread to exit and extract any scalars it might return, you can use the join() method. @@ -357,11 +365,11 @@ use the join() method. sub sub1 { return "Fifty-six", "foo", 2; } In the example above, the join() method returns as soon as the thread -ends. In addition to waiting for a thread to finish and gathering up +ends. In addition to waiting for a thread to finish and gathering up any values that the thread might have returned, join() also performs any OS cleanup necessary for the thread. That cleanup might be important, especially for long-running programs that spawn lots of -threads. If you don't want the return values and don't want to wait +threads. If you don't want the return values and don't want to wait for the thread to finish, you should call the detach() method instead. detach() is covered later in the article. @@ -369,7 +377,7 @@ instead. detach() is covered later in the article. So what happens when an error occurs in a thread? Any errors that could be caught with eval() are postponed until the thread is -joined. If your program never joins, the errors appear when your +joined. If your program never joins, the errors appear when your program exits. Errors deferred until a join() can be caught with eval(): @@ -389,13 +397,13 @@ to get them. =head2 Ignoring A Thread -join() does three things:it waits for a thread to exit, cleans up -after it, and returns any data the thread may have produced. But what +join() does three things: it waits for a thread to exit, cleans up +after it, and returns any data the thread may have produced. But what if you're not interested in the thread's return values, and you don't really care when the thread finishes? All you want is for the thread to get cleaned up after when it's done. -In this case, you use the detach() method. Once a thread is detached, +In this case, you use the detach() method. Once a thread is detached, it'll run until it's finished, then Perl will clean up after it automatically. @@ -421,29 +429,29 @@ lost. =head1 Threads And Data Now that we've covered the basics of threads, it's time for our next -topic: data. Threading introduces a couple of complications to data +topic: data. Threading introduces a couple of complications to data access that non-threaded programs never need to worry about. =head2 Shared And Unshared Data The single most important thing to remember when using threads is that all threads potentially have access to all the data anywhere in your -program. While this is true with a nonthreaded Perl program as well, +program. While this is true with a nonthreaded Perl program as well, it's especially important to remember with a threaded program, since more than one thread can be accessing this data at once. Perl's scoping rules don't change because you're using threads. If a subroutine (or block, in the case of async()) could see a variable if -you weren't running with threads, it can see it if you are. This is -especially important for the subroutines that create, and makes my -variables even more important. Remember--if your variables aren't -lexically scoped (declared with C<my>) you're probably sharing it between -threads. +you weren't running with threads, it can see it if you are. This is +especially important for the subroutines that create, and makes C<my> +variables even more important. Remember--if your variables aren't +lexically scoped (declared with C<my>) you're probably sharing them +between threads. =head2 Thread Pitfall: Races While threads bring a new set of useful tools, they also bring a -number of pitfalls. One pitfall is the race condition: +number of pitfalls. One pitfall is the race condition: use Thread; $a = 1; @@ -458,14 +466,14 @@ number of pitfalls. One pitfall is the race condition: What do you think $a will be? The answer, unfortunately, is "it depends." Both sub1() and sub2() access the global variable $a, once -to read and once to write. Depending on factors ranging from your +to read and once to write. Depending on factors ranging from your thread implementation's scheduling algorithm to the phase of the moon, $a can be 2 or 3. Race conditions are caused by unsynchronized access to shared -data. Without explicit synchronization, there's no way to be sure that +data. Without explicit synchronization, there's no way to be sure that nothing has happened to the shared data between the time you access it -and the time you update it. Even this simple code fragment has the +and the time you update it. Even this simple code fragment has the possibility of error: use Thread qw(async); @@ -473,8 +481,8 @@ possibility of error: async{ $b = $a; $a = $b + 1; }; async{ $c = $a; $a = $c + 1; }; -Two threads both access $a. Each thread can potentially be interrupted -at any point, or be executed in any order. At the end, $a could be 3 +Two threads both access $a. Each thread can potentially be interrupted +at any point, or be executed in any order. At the end, $a could be 3 or 4, and both $b and $c could be 2 or 3. Whenever your program accesses data or resources that can be accessed @@ -484,9 +492,9 @@ data corruption and race conditions. =head2 Controlling access: lock() The lock() function takes a variable (or subroutine, but we'll get to -that later) and puts a lock on it. No other thread may lock the +that later) and puts a lock on it. No other thread may lock the variable until the locking thread exits the innermost block containing -the lock. Using lock() is straightforward: +the lock. Using lock() is straightforward: use Thread qw(async); $a = 4; @@ -513,29 +521,29 @@ the lock. Using lock() is straightforward: print "\$a is $a\n"; lock() blocks the thread until the variable being locked is -available. When lock() returns, your thread can be sure that no other +available. When lock() returns, your thread can be sure that no other thread can lock that variable until the innermost block containing the lock exits. It's important to note that locks don't prevent access to the variable -in question, only lock attempts. This is in keeping with Perl's +in question, only lock attempts. This is in keeping with Perl's longstanding tradition of courteous programming, and the advisory file -locking that flock() gives you. Locked subroutines behave differently, -however. We'll cover that later in the article. +locking that flock() gives you. Locked subroutines behave differently, +however. We'll cover that later in the article. -You may lock arrays and hashes as well as scalars. Locking an array, +You may lock arrays and hashes as well as scalars. Locking an array, though, will not block subsequent locks on array elements, just lock attempts on the array itself. Finally, locks are recursive, which means it's okay for a thread to -lock a variable more than once. The lock will last until the outermost +lock a variable more than once. The lock will last until the outermost lock() on the variable goes out of scope. =head2 Thread Pitfall: Deadlocks -Locks are a handy tool to synchronize access to data. Using them -properly is the key to safe shared data. Unfortunately, locks aren't -without their dangers. Consider the following code: +Locks are a handy tool to synchronize access to data. Using them +properly is the key to safe shared data. Unfortunately, locks aren't +without their dangers. Consider the following code: use Thread qw(async yield); $a = 4; @@ -553,34 +561,34 @@ without their dangers. Consider the following code: lock ($a); }; -This program will probably hang until you kill it. The only way it +This program will probably hang until you kill it. The only way it won't hang is if one of the two async() routines acquires both locks -first. A guaranteed-to-hang version is more complicated, but the +first. A guaranteed-to-hang version is more complicated, but the principle is the same. The first thread spawned by async() will grab a lock on $a then, a -second or two later, try to grab a lock on $b. Meanwhile, the second -thread grabs a lock on $b, then later tries to grab a lock on $a. The +second or two later, try to grab a lock on $b. Meanwhile, the second +thread grabs a lock on $b, then later tries to grab a lock on $a. The second lock attempt for both threads will block, each waiting for the other to release its lock. This condition is called a deadlock, and it occurs whenever two or more threads are trying to get locks on resources that the others -own. Each thread will block, waiting for the other to release a lock -on a resource. That never happens, though, since the thread with the +own. Each thread will block, waiting for the other to release a lock +on a resource. That never happens, though, since the thread with the resource is itself waiting for a lock to be released. -There are a number of ways to handle this sort of problem. The best +There are a number of ways to handle this sort of problem. The best way is to always have all threads acquire locks in the exact same -order. If, for example, you lock variables $a, $b, and $c, always lock -$a before $b, and $b before $c. It's also best to hold on to locks for +order. If, for example, you lock variables $a, $b, and $c, always lock +$a before $b, and $b before $c. It's also best to hold on to locks for as short a period of time to minimize the risks of deadlock. =head2 Queues: Passing Data Around A queue is a special thread-safe object that lets you put data in one end and take it out the other without having to worry about -synchronization issues. They're pretty straightforward, and look like +synchronization issues. They're pretty straightforward, and look like this: use Thread qw(async); @@ -599,13 +607,13 @@ this: sleep 10; $DataQueue->enqueue(undef); -You create the queue with new Thread::Queue. Then you can add lists of +You create the queue with new Thread::Queue. Then you can add lists of scalars onto the end with enqueue(), and pop scalars off the front of -it with dequeue(). A queue has no fixed size, and can grow as needed +it with dequeue(). A queue has no fixed size, and can grow as needed to hold everything pushed on to it. If a queue is empty, dequeue() blocks until another thread enqueues -something. This makes queues ideal for event loops and other +something. This makes queues ideal for event loops and other communications between threads. =head1 Threads And Code @@ -617,10 +625,10 @@ entire subroutines. =head2 Semaphores: Synchronizing Data Access -Semaphores are a kind of generic locking mechanism. Unlike lock, which +Semaphores are a kind of generic locking mechanism. Unlike lock, which gets a lock on a particular scalar, Perl doesn't associate any particular thing with a semaphore so you can use them to control -access to anything you like. In addition, semaphores can allow more +access to anything you like. In addition, semaphores can allow more than one thread to access a resource at once, though by default semaphores only allow one thread access at a time. @@ -630,7 +638,7 @@ semaphores only allow one thread access at a time. Semaphores have two methods, down and up. down decrements the resource count, while up increments it. down calls will block if the -semaphore's current count would decrement below zero. This program +semaphore's current count would decrement below zero. This program gives a quick demonstration: use Thread qw(yield); @@ -659,20 +667,20 @@ gives a quick demonstration: } } -The three invocations of the subroutine all operate in sync. The +The three invocations of the subroutine all operate in sync. The semaphore, though, makes sure that only one thread is accessing the global variable at once. =item Advanced Semaphores By default, semaphores behave like locks, letting only one thread -down() them at a time. However, there are other uses for semaphores. +down() them at a time. However, there are other uses for semaphores. Each semaphore has a counter attached to it. down() decrements the -counter and up() increments the counter. By default, semaphores are +counter and up() increments the counter. By default, semaphores are created with the counter set to one, down() decrements by one, and -up() increments by one. If down() attempts to decrement the counter -below zero, it blocks until the counter is large enough. Note that +up() increments by one. If down() attempts to decrement the counter +below zero, it blocks until the counter is large enough. Note that while a semaphore can be created with a starting count of zero, any up() or down() always changes the counter by at least one. $semaphore->down(0) is the same as $semaphore->down(1). @@ -680,21 +688,21 @@ one. $semaphore->down(0) is the same as $semaphore->down(1). The question, of course, is why would you do something like this? Why create a semaphore with a starting count that's not one, or why decrement/increment it by more than one? The answer is resource -availability. Many resources that you want to manage access for can be +availability. Many resources that you want to manage access for can be safely used by more than one thread at once. -For example, let's take a GUI driven program. It has a semaphore that +For example, let's take a GUI driven program. It has a semaphore that it uses to synchronize access to the display, so only one thread is -ever drawing at once. Handy, but of course you don't want any thread -to start drawing until things are properly set up. In this case, you +ever drawing at once. Handy, but of course you don't want any thread +to start drawing until things are properly set up. In this case, you can create a semaphore with a counter set to zero, and up it when things are ready for drawing. Semaphores with counters greater than one are also useful for -establishing quotas. Say, for example, that you have a number of -threads that can do I/O at once. You don't want all the threads +establishing quotas. Say, for example, that you have a number of +threads that can do I/O at once. You don't want all the threads reading or writing at once though, since that can potentially swamp -your I/O channels, or deplete your process' quota of filehandles. You +your I/O channels, or deplete your process' quota of filehandles. You can use a semaphore initialized to the number of concurrent I/O requests (or open files) that you want at any one time, and have your threads quietly block and unblock themselves. @@ -707,14 +715,14 @@ thread needs to check out or return a number of resources at once. =head2 Attributes: Restricting Access To Subroutines In addition to synchronizing access to data or resources, you might -find it useful to synchronize access to subroutines. You may be +find it useful to synchronize access to subroutines. You may be accessing a singular machine resource (perhaps a vector processor), or find it easier to serialize calls to a particular subroutine than to have a set of locks and sempahores. -One of the additions to Perl 5.005 is subroutine attributes. The +One of the additions to Perl 5.005 is subroutine attributes. The Thread package uses these to provide several flavors of -serialization. It's important to remember that these attributes are +serialization. It's important to remember that these attributes are used in the compilation phase of your program so you can't change a subroutine's behavior while your program is actually running. @@ -722,14 +730,13 @@ subroutine's behavior while your program is actually running. The basic subroutine lock looks like this: - sub test_sub { - use attrs qw(locked); + sub test_sub :locked { } This ensures that only one thread will be executing this subroutine at -any one time. Once a thread calls this subroutine, any other thread +any one time. Once a thread calls this subroutine, any other thread that calls it will block until the thread in the subroutine exits -it. A more elaborate example looks like this: +it. A more elaborate example looks like this: use Thread qw(yield); @@ -738,8 +745,7 @@ it. A more elaborate example looks like this: new Thread \&thread_sub, 3; new Thread \&thread_sub, 4; - sub sync_sub { - use attrs qw(locked); + sub sync_sub :locked { my $CallingThread = shift @_; print "In sync_sub for thread $CallingThread\n"; yield; @@ -754,16 +760,16 @@ it. A more elaborate example looks like this: print "$ThreadID is done with sync_sub\n"; } -The use attrs qw(locked) locks sync_sub(), and if you run this, you -can see that only one thread is in it at any one time. +The C<locked> attribute tells perl to lock sync_sub(), and if you run +this, you can see that only one thread is in it at any one time. =head2 Methods Locking an entire subroutine can sometimes be overkill, especially -when dealing with Perl objects. When calling a method for an object, +when dealing with Perl objects. When calling a method for an object, for example, you want to serialize calls to a method, so that only one thread will be in the subroutine for a particular object, but threads -calling that subroutine for a different object aren't blocked. The +calling that subroutine for a different object aren't blocked. The method attribute indicates whether the subroutine is really a method. use Thread; @@ -793,8 +799,7 @@ method attribute indicates whether the subroutine is really a method. return bless [@_], $class; } - sub per_object { - use attrs qw(locked method); + sub per_object :locked :method { my ($class, $thrnum) = @_; print "In per_object for thread $thrnum\n"; yield; @@ -802,8 +807,7 @@ method attribute indicates whether the subroutine is really a method. print "Exiting per_object for thread $thrnum\n"; } - sub one_at_a_time { - use attrs qw(locked); + sub one_at_a_time :locked { my ($class, $thrnum) = @_; print "In one_at_a_time for thread $thrnum\n"; yield; @@ -817,25 +821,25 @@ thread is ever in one_at_a_time() at once. =head2 Locking A Subroutine -You can lock a subroutine as you would lock a variable. Subroutine -locks work the same as a C<use attrs qw(locked)> in the subroutine, +You can lock a subroutine as you would lock a variable. Subroutine locks +work the same as specifying a C<locked> attribute for the subroutine, and block all access to the subroutine for other threads until the -lock goes out of scope. When the subroutine isn't locked, any number +lock goes out of scope. When the subroutine isn't locked, any number of threads can be in it at once, and getting a lock on a subroutine -doesn't affect threads already in the subroutine. Getting a lock on a +doesn't affect threads already in the subroutine. Getting a lock on a subroutine looks like this: lock(\&sub_to_lock); -Simple enough. Unlike use attrs, which is a compile time option, -locking and unlocking a subroutine can be done at runtime at your -discretion. There is some runtime penalty to using lock(\&sub) instead -of use attrs qw(locked), so make sure you're choosing the proper +Simple enough. Unlike the C<locked> attribute, which is a compile time +option, locking and unlocking a subroutine can be done at runtime at your +discretion. There is some runtime penalty to using lock(\&sub) instead +of the C<locked> attribute, so make sure you're choosing the proper method to do the locking. You'd choose lock(\&sub) when writing modules and code to run on both threaded and unthreaded Perl, especially for code that will run on -5.004 or earlier Perls. In that case, it's useful to have subroutines +5.004 or earlier Perls. In that case, it's useful to have subroutines that should be serialized lock themselves if they're running threaded, like so: @@ -855,20 +859,20 @@ version of Perl you're running. We've covered the workhorse parts of Perl's threading package, and with these tools you should be well on your way to writing threaded -code and packages. There are a few useful little pieces that didn't +code and packages. There are a few useful little pieces that didn't really fit in anyplace else. =head2 What Thread Am I In? The Thread->self method provides your program with a way to get an -object representing the thread it's currently in. You can use this +object representing the thread it's currently in. You can use this object in the same way as the ones returned from the thread creation. =head2 Thread IDs tid() is a thread object method that returns the thread ID of the -thread the object represents. Thread IDs are integers, with the main -thread in a program being 0. Currently Perl assigns a unique tid to +thread the object represents. Thread IDs are integers, with the main +thread in a program being 0. Currently Perl assigns a unique tid to every thread ever created in your program, assigning the first thread to be created a tid of 1, and increasing the tid by 1 for each new thread that's created. @@ -881,7 +885,7 @@ if the objects represent the same thread, and false if they don't. =head2 What Threads Are Running? Thread->list returns a list of thread objects, one for each thread -that's currently running. Handy for a number of things, including +that's currently running. Handy for a number of things, including cleaning up at the end of your program: # Loop through all the threads @@ -892,14 +896,14 @@ cleaning up at the end of your program: } } -The example above is just for illustration. It isn't strictly +The example above is just for illustration. It isn't strictly necessary to join all the threads you create, since Perl detaches all the threads before it exits. =head1 A Complete Example Confused yet? It's time for an example program to show some of the -things we've covered. This program finds prime numbers using threads. +things we've covered. This program finds prime numbers using threads. 1 #!/usr/bin/perl -w 2 # prime-pthread, courtesy of Tom Christiansen @@ -936,12 +940,12 @@ things we've covered. This program finds prime numbers using threads. 33 $kid->join() if $kid; 34 } -This program uses the pipeline model to generate prime numbers. Each +This program uses the pipeline model to generate prime numbers. Each thread in the pipeline has an input queue that feeds numbers to be checked, a prime number that it's responsible for, and an output queue -that it funnels numbers that have failed the check into. If the thread +that it funnels numbers that have failed the check into. If the thread has a number that's failed its check and there's no child thread, then -the thread must have found a new prime number. In that case, a new +the thread must have found a new prime number. In that case, a new child thread is created for that prime and stuck on the end of the pipeline. @@ -952,20 +956,20 @@ number is, it's a number that's only evenly divisible by itself and 1) The bulk of the work is done by the check_num() subroutine, which takes a reference to its input queue and a prime number that it's -responsible for. After pulling in the input queue and the prime that +responsible for. After pulling in the input queue and the prime that the subroutine's checking (line 20), we create a new queue (line 22) and reserve a scalar for the thread that we're likely to create later (line 21). The while loop from lines 23 to line 31 grabs a scalar off the input queue and checks against the prime this thread is responsible -for. Line 24 checks to see if there's a remainder when we modulo the -number to be checked against our prime. If there is one, the number +for. Line 24 checks to see if there's a remainder when we modulo the +number to be checked against our prime. If there is one, the number must not be evenly divisible by our prime, so we need to either pass it on to the next thread if we've created one (line 26) or create a new thread if we haven't. -The new thread creation is line 29. We pass on to it a reference to +The new thread creation is line 29. We pass on to it a reference to the queue we've created, and the prime number we've found. Finally, once the loop terminates (because we got a 0 or undef in the @@ -975,18 +979,18 @@ child and wait for it to exit if we've created a child (Lines 32 and Meanwhile, back in the main thread, we create a queue (line 9) and the initial child thread (line 10), and pre-seed it with the first prime: -2. Then we queue all the numbers from 3 to 1000 for checking (lines +2. Then we queue all the numbers from 3 to 1000 for checking (lines 12-14), then queue a die notice (line 16) and wait for the first child -thread to terminate (line 17). Because a child won't die until its +thread to terminate (line 17). Because a child won't die until its child has died, we know that we're done once we return from the join. -That's how it works. It's pretty simple; as with many Perl programs, +That's how it works. It's pretty simple; as with many Perl programs, the explanation is much longer than the program. =head1 Conclusion A complete thread tutorial could fill a book (and has, many times), -but this should get you well on your way. The final authority on how +but this should get you well on your way. The final authority on how Perl's threads behave is the documention bundled with the Perl distribution, but with what we've covered in this article, you should be well on your way to becoming a threaded Perl expert. @@ -1046,7 +1050,7 @@ France, September 1992, Yves Bekkers and Jacques Cohen, eds. Springer, Thanks (in no particular order) to Chaim Frenkel, Steve Fink, Gurusamy Sarathy, Ilya Zakharevich, Benjamin Sugars, Jürgen Christoffel, Joshua Pritikin, and Alan Burlison, for their help in reality-checking and -polishing this article. Big thanks to Tom Christiansen for his rewrite +polishing this article. Big thanks to Tom Christiansen for his rewrite of the prime number generator. =head1 AUTHOR diff --git a/contrib/perl5/pod/perltie.pod b/contrib/perl5/pod/perltie.pod index 6652658..c835738 100644 --- a/contrib/perl5/pod/perltie.pod +++ b/contrib/perl5/pod/perltie.pod @@ -184,11 +184,13 @@ methods: TIEARRAY, FETCH, STORE, FETCHSIZE, STORESIZE and perhaps DESTROY. FETCHSIZE and STORESIZE are used to provide C<$#array> and equivalent C<scalar(@array)> access. - -The methods POP, PUSH, SHIFT, UNSHIFT, SPLICE are required if the perl -operator with the corresponding (but lowercase) name is to operate on the -tied array. The B<Tie::Array> class can be used as a base class to implement -these in terms of the basic five methods above. + +The methods POP, PUSH, SHIFT, UNSHIFT, SPLICE, DELETE, and EXISTS are +required if the perl operator with the corresponding (but lowercase) name +is to operate on the tied array. The B<Tie::Array> class can be used as a +base class to implement the first five of these in terms of the basic +methods above. The default implementations of DELETE and EXISTS in +B<Tie::Array> simply C<croak>. In addition EXTEND will be called when perl would have pre-extended allocation in a real array. @@ -621,7 +623,9 @@ This is partially implemented now. A class implementing a tied filehandle should define the following methods: TIEHANDLE, at least one of PRINT, PRINTF, WRITE, READLINE, GETC, -READ, and possibly CLOSE and DESTROY. +READ, and possibly CLOSE and DESTROY. The class can also provide: BINMODE, +OPEN, EOF, FILENO, SEEK, TELL - if the corresponding perl operators are +used on the handle. It is especially useful when perl is embedded in some other program, where output to STDOUT and STDERR may have to be redirected in some @@ -739,6 +743,7 @@ a scalar. package Remember; use strict; + use warnings; use IO::File; sub TIESCALAR { @@ -832,7 +837,7 @@ destructor (DESTROY) is called, which is normal for objects that have no more valid references; and thus the file is closed. In the second example, however, we have stored another reference to -the tied object in C<$x>. That means that when untie() gets called +the tied object in $x. That means that when untie() gets called there will still be a valid reference to the object in existence, so the destructor is not called at that time, and thus the file is not closed. The reason there is no output is because the file buffers @@ -841,7 +846,8 @@ have not been flushed to disk. Now that you know what the problem is, what can you do to avoid it? Well, the good old C<-w> flag will spot any instances where you call untie() and there are still valid references to the tied object. If -the second script above is run with the C<-w> flag, Perl prints this +the second script above this near the top C<use warnings 'untie'> +or was run with the C<-w> flag, Perl prints this warning message: untie attempted while 1 inner references still exist diff --git a/contrib/perl5/pod/perltoc.pod b/contrib/perl5/pod/perltoc.pod index 9dc0b36..798a24d 100644 --- a/contrib/perl5/pod/perltoc.pod +++ b/contrib/perl5/pod/perltoc.pod @@ -13,16 +13,23 @@ through to locate the proper section you're looking for. =head2 perl - Practical Extraction and Report Language +=over + =item SYNOPSIS =item DESCRIPTION -Many usability enhancements, Simplified grammar, Lexical scoping, -Arbitrarily nested data structures, Modularity and reusability, -Object-oriented programming, Embeddable and Extensible, POSIX compliant, -Package constructors and destructors, Multiple simultaneous DBM -implementations, Subroutine definitions may now be autoloaded, Regular -expression enhancements, Innumerable Unbundled Modules, Compilability +modularity and reusability using innumerable modules, embeddable and +extensible, roll-your-own magic variables (including multiple simultaneous +DBM implementations), subroutines can now be overridden, autoloaded, and +prototyped, arbitrarily nested data structures and anonymous functions, +object-oriented programming, compilability into C code or Perl bytecode, +support for light-weight processes (threads), support for +internationalization, localization, and Unicode, lexical scoping, regular +expression enhancements, enhanced debugger and interactive Perl +environment, with integrated editor support, POSIX 1003.1 compliant library + +=item AVAILABILITY =item ENVIRONMENT @@ -38,16 +45,231 @@ expression enhancements, Innumerable Unbundled Modules, Compilability =item NOTES -=head2 perlfaq - frequently asked questions about Perl ($Date: 1998/07/20 -23:12:17 $) +=back + +=head2 perlfaq - frequently asked questions about Perl ($Date: 1999/05/23 +20:38:02 $) + +=over =item DESCRIPTION perlfaq: Structural overview of the FAQ, L<perlfaq1>: General Questions -About Perl, L<perlfaq2>: Obtaining and Learning about Perl, L<perlfaq3>: -Programming Tools, L<perlfaq4>: Data Manipulation, L<perlfaq5>: Files and -Formats, L<perlfaq6>: Regexps, L<perlfaq7>: General Perl Language Issues, -L<perlfaq8>: System Interaction, L<perlfaq9>: Networking +About Perl, What is Perl?, Who supports Perl? Who develops it? Why is it +free?, Which version of Perl should I use?, What are perl4 and perl5?, What +is perl6?, How stable is Perl?, Is Perl difficult to learn?, How does Perl +compare with other languages like Java, Python, REXX, Scheme, or Tcl?, Can +I do [task] in Perl?, When shouldn't I program in Perl?, What's the +difference between "perl" and "Perl"?, Is it a Perl program or a Perl +script?, What is a JAPH?, Where can I get a list of Larry Wall witticisms?, +How can I convince my sysadmin/supervisor/employees to use version +(5/5.005/Perl instead of some other language)?, L<perlfaq2>: Obtaining and +Learning about Perl, What machines support Perl? Where do I get it?, How +can I get a binary version of Perl?, I don't have a C compiler on my +system. How can I compile perl?, I copied the Perl binary from one machine +to another, but scripts don't work, I grabbed the sources and tried to +compile but gdbm/dynamic loading/malloc/linking/... failed. How do I make +it work?, What modules and extensions are available for Perl? What is +CPAN? What does CPAN/src/... mean?, Is there an ISO or ANSI certified +version of Perl?, Where can I get information on Perl?, What are the Perl +newsgroups on USENET? Where do I post questions?, Where should I post +source code?, Perl Books, Perl in Magazines, Perl on the Net: FTP and WWW +Access, What mailing lists are there for perl?, Archives of +comp.lang.perl.misc, Where can I buy a commercial version of Perl?, Where +do I send bug reports?, What is perl.com?, L<perlfaq3>: Programming Tools, +How do I do (anything)?, How can I use Perl interactively?, Is there a Perl +shell?, How do I debug my Perl programs?, How do I profile my Perl +programs?, How do I cross-reference my Perl programs?, Is there a +pretty-printer (formatter) for Perl?, Is there a ctags for Perl?, Is there +an IDE or Windows Perl Editor?, Where can I get Perl macros for vi?, Where +can I get perl-mode for emacs?, How can I use curses with Perl?, How can I +use X or Tk with Perl?, How can I generate simple menus without using CGI +or Tk?, What is undump?, How can I make my Perl program run faster?, How +can I make my Perl program take less memory?, Is it unsafe to return a +pointer to local data?, How can I free an array or hash so my program +shrinks?, How can I make my CGI script more efficient?, How can I hide the +source for my Perl program?, How can I compile my Perl program into byte +code or C?, How can I compile Perl into Java?, How can I get C<#!perl> to +work on [MS-DOS,NT,...]?, Can I write useful perl programs on the command +line?, Why don't perl one-liners work on my DOS/Mac/VMS system?, Where can +I learn about CGI or Web programming in Perl?, Where can I learn about +object-oriented Perl programming?, Where can I learn about linking C with +Perl? [h2xs, xsubpp], I've read perlembed, perlguts, etc., but I can't +embed perl inmy C program, what am I doing wrong?, When I tried to run my +script, I got this message. What does itmean?, What's MakeMaker?, +L<perlfaq4>: Data Manipulation, Why am I getting long decimals (eg, +19.9499999999999) instead of the numbers I should be getting (eg, 19.95)?, +Why isn't my octal data interpreted correctly?, Does Perl have a round() +function? What about ceil() and floor()? Trig functions?, How do I +convert bits into ints?, Why doesn't & work the way I want it to?, How do I +multiply matrices?, How do I perform an operation on a series of integers?, +How can I output Roman numerals?, Why aren't my random numbers random?, How +do I find the week-of-the-year/day-of-the-year?, How do I find the current +century or millennium?, How can I compare two dates and find the +difference?, How can I take a string and turn it into epoch seconds?, How +can I find the Julian Day?, How do I find yesterday's date?, Does Perl have +a year 2000 problem? Is Perl Y2K compliant?, How do I validate input?, How +do I unescape a string?, How do I remove consecutive pairs of characters?, +How do I expand function calls in a string?, How do I find matching/nesting +anything?, How do I reverse a string?, How do I expand tabs in a string?, +How do I reformat a paragraph?, How can I access/change the first N letters +of a string?, How do I change the Nth occurrence of something?, How can I +count the number of occurrences of a substring within a string?, How do I +capitalize all the words on one line?, How can I split a [character] +delimited string except when inside[character]? (Comma-separated files), +How do I strip blank space from the beginning/end of a string?, How do I +pad a string with blanks or pad a number with zeroes?, How do I extract +selected columns from a string?, How do I find the soundex value of a +string?, How can I expand variables in text strings?, What's wrong with +always quoting "$vars"?, Why don't my <<HERE documents work?, What is the +difference between a list and an array?, What is the difference between +$array[1] and @array[1]?, How can I remove duplicate elements from a list +or array?, How can I tell whether a list or array contains a certain +element?, How do I compute the difference of two arrays? How do I compute +the intersection of two arrays?, How do I test whether two arrays or hashes +are equal?, How do I find the first array element for which a condition is +true?, How do I handle linked lists?, How do I handle circular lists?, How +do I shuffle an array randomly?, How do I process/modify each element of an +array?, How do I select a random element from an array?, How do I permute N +elements of a list?, How do I sort an array by (anything)?, How do I +manipulate arrays of bits?, Why does defined() return true on empty arrays +and hashes?, How do I process an entire hash?, What happens if I add or +remove keys from a hash while iterating over it?, How do I look up a hash +element by value?, How can I know how many entries are in a hash?, How do I +sort a hash (optionally by value instead of key)?, How can I always keep my +hash sorted?, What's the difference between "delete" and "undef" with +hashes?, Why don't my tied hashes make the defined/exists distinction?, How +do I reset an each() operation part-way through?, How can I get the unique +keys from two hashes?, How can I store a multidimensional array in a DBM +file?, How can I make my hash remember the order I put elements into it?, +Why does passing a subroutine an undefined element in a hash create it?, +How can I make the Perl equivalent of a C structure/C++ class/hash or array +of hashes or arrays?, How can I use a reference as a hash key?, How do I +handle binary data correctly?, How do I determine whether a scalar is a +number/whole/integer/float?, How do I keep persistent data across program +calls?, How do I print out or copy a recursive data structure?, How do I +define methods for every class/object?, How do I verify a credit card +checksum?, How do I pack arrays of doubles or floats for XS code?, +L<perlfaq5>: Files and Formats, How do I flush/unbuffer an output +filehandle? Why must I do this?, How do I change one line in a file/delete +a line in a file/insert a line in the middle of a file/append to the +beginning of a file?, How do I count the number of lines in a file?, How do +I make a temporary file name?, How can I manipulate fixed-record-length +files?, How can I make a filehandle local to a subroutine? How do I pass +filehandles between subroutines? How do I make an array of filehandles?, +How can I use a filehandle indirectly?, How can I set up a footer format to +be used with write()?, How can I write() into a string?, How can I output +my numbers with commas added?, How can I translate tildes (~) in a +filename?, How come when I open a file read-write it wipes it out?, Why do +I sometimes get an "Argument list too long" when I use <*>?, Is there a +leak/bug in glob()?, How can I open a file with a leading ">" or trailing +blanks?, How can I reliably rename a file?, How can I lock a file?, Why +can't I just open(FH, ">file.lock")?, I still don't get locking. I just +want to increment the number in the file. How can I do this?, How do I +randomly update a binary file?, How do I get a file's timestamp in perl?, +How do I set a file's timestamp in perl?, How do I print to more than one +file at once?, How can I read in an entire file all at once?, How can I +read in a file by paragraphs?, How can I read a single character from a +file? From the keyboard?, How can I tell whether there's a character +waiting on a filehandle?, How do I do a C<tail -f> in perl?, How do I dup() +a filehandle in Perl?, How do I close a file descriptor by number?, Why +can't I use "C:\temp\foo" in DOS paths? What doesn't `C:\temp\foo.exe` +work?, Why doesn't glob("*.*") get all the files?, Why does Perl let me +delete read-only files? Why does C<-i> clobber protected files? Isn't +this a bug in Perl?, How do I select a random line from a file?, Why do I +get weird spaces when I print an array of lines?, L<perlfaq6>: Regexps, How +can I hope to use regular expressions without creating illegible and +unmaintainable code?, I'm having trouble matching over more than one line. +What's wrong?, How can I pull out lines between two patterns that are +themselves on different lines?, I put a regular expression into $/ but it +didn't work. What's wrong?, How do I substitute case insensitively on the +LHS, but preserving case on the RHS?, How can I make C<\w> match national +character sets?, How can I match a locale-smart version of C</[a-zA-Z]/>?, +How can I quote a variable to use in a regex?, What is C</o> really for?, +How do I use a regular expression to strip C style comments from a file?, +Can I use Perl regular expressions to match balanced text?, What does it +mean that regexes are greedy? How can I get around it?, How do I process +each word on each line?, How can I print out a word-frequency or +line-frequency summary?, How can I do approximate matching?, How do I +efficiently match many regular expressions at once?, Why don't +word-boundary searches with C<\b> work for me?, Why does using $&, $`, or +$' slow my program down?, What good is C<\G> in a regular expression?, Are +Perl regexes DFAs or NFAs? Are they POSIX compliant?, What's wrong with +using grep or map in a void context?, How can I match strings with +multibyte characters?, How do I match a pattern that is supplied by the +user?, L<perlfaq7>: General Perl Language Issues, Can I get a BNF/yacc/RE +for the Perl language?, What are all these $@%&* punctuation signs, and how +do I know when to use them?, Do I always/never have to quote my strings or +use semicolons and commas?, How do I skip some return values?, How do I +temporarily block warnings?, What's an extension?, Why do Perl operators +have different precedence than C operators?, How do I declare/create a +structure?, How do I create a module?, How do I create a class?, How can I +tell if a variable is tainted?, What's a closure?, What is variable suicide +and how can I prevent it?, How can I pass/return a {Function, FileHandle, +Array, Hash, Method, Regex}?, How do I create a static variable?, What's +the difference between dynamic and lexical (static) scoping? Between +local() and my()?, How can I access a dynamic variable while a similarly +named lexical is in scope?, What's the difference between deep and shallow +binding?, Why doesn't "my($foo) = <FILE>;" work right?, How do I redefine a +builtin function, operator, or method?, What's the difference between +calling a function as &foo and foo()?, How do I create a switch or case +statement?, How can I catch accesses to undefined +variables/functions/methods?, Why can't a method included in this same file +be found?, How can I find out my current package?, How can I comment out a +large block of perl code?, How do I clear a package?, How can I use a +variable as a variable name?, L<perlfaq8>: System Interaction, How do I +find out which operating system I'm running under?, How come exec() doesn't +return?, How do I do fancy stuff with the keyboard/screen/mouse?, How do I +print something out in color?, How do I read just one key without waiting +for a return key?, How do I check whether input is ready on the keyboard?, +How do I clear the screen?, How do I get the screen size?, How do I ask the +user for a password?, How do I read and write the serial port?, How do I +decode encrypted password files?, How do I start a process in the +background?, How do I trap control characters/signals?, How do I modify the +shadow password file on a Unix system?, How do I set the time and date?, +How can I sleep() or alarm() for under a second?, How can I measure time +under a second?, How can I do an atexit() or setjmp()/longjmp()? (Exception +handling), Why doesn't my sockets program work under System V (Solaris)? +What does the error message "Protocol not supported" mean?, How can I call +my system's unique C functions from Perl?, Where do I get the include files +to do ioctl() or syscall()?, Why do setuid perl scripts complain about +kernel problems?, How can I open a pipe both to and from a command?, Why +can't I get the output of a command with system()?, How can I capture +STDERR from an external command?, Why doesn't open() return an error when a +pipe open fails?, What's wrong with using backticks in a void context?, How +can I call backticks without shell processing?, Why can't my script read +from STDIN after I gave it EOF (^D on Unix, ^Z on MS-DOS)?, How can I +convert my shell script to perl?, Can I use perl to run a telnet or ftp +session?, How can I write expect in Perl?, Is there a way to hide perl's +command line from programs such as "ps"?, I {changed directory, modified my +environment} in a perl script. How come the change disappeared when I +exited the script? How do I get my changes to be visible?, How do I close +a process's filehandle without waiting for it to complete?, How do I fork a +daemon process?, How do I make my program run with sh and csh?, How do I +find out if I'm running interactively or not?, How do I timeout a slow +event?, How do I set CPU limits?, How do I avoid zombies on a Unix system?, +How do I use an SQL database?, How do I make a system() exit on control-C?, +How do I open a file without blocking?, How do I install a module from +CPAN?, What's the difference between require and use?, How do I keep my own +module/library directory?, How do I add the directory my program lives in +to the module/library search path?, How do I add a directory to my include +path at runtime?, What is socket.ph and where do I get it?, L<perlfaq9>: +Networking, My CGI script runs from the command line but not the browser. +(500 Server Error), How can I get better error messages from a CGI +program?, How do I remove HTML from a string?, How do I extract URLs?, How +do I download a file from the user's machine? How do I open a file on +another machine?, How do I make a pop-up menu in HTML?, How do I fetch an +HTML file?, How do I automate an HTML form submission?, How do I decode or +create those %-encodings on the web?, How do I redirect to another page?, +How do I put a password on my web pages?, How do I edit my .htpasswd and +.htgroup files with Perl?, How do I make sure users can't enter values into +a form that cause my CGI script to do bad things?, How do I parse a mail +header?, How do I decode a CGI form?, How do I check a valid mail address?, +How do I decode a MIME/BASE64 string?, How do I return the user's mail +address?, How do I send mail?, How do I read mail?, How do I find out my +hostname/domainname/IP address?, How do I fetch a news article or the +active newsgroups?, How do I fetch/put an FTP file?, How can I do RPC in +Perl? =over @@ -74,11 +296,15 @@ authors =item Changes -24/April/97, 23/April/97, 25/March/97, 18/March/97, 17/March/97 Version, -Initial Release: 11/March/97 +23/May/99, 13/April/99, 7/January/99, 22/June/98, 24/April/97, 23/April/97, +25/March/97, 18/March/97, 17/March/97 Version, Initial Release: 11/March/97 + +=back + +=head2 perlfaq1 - General Questions About Perl ($Revision: 1.23 $, $Date: +1999/05/23 16:08:30 $) -=head2 perlfaq1 - General Questions About Perl ($Revision: 1.14 $, $Date: -1998/06/14 22:15:25 $) +=over =item DESCRIPTION @@ -92,6 +318,8 @@ Initial Release: 11/March/97 =item What are perl4 and perl5? +=item What is perl6? + =item How stable is Perl? =item Is Perl difficult to learn? @@ -112,14 +340,18 @@ Scheme, or Tcl? =item Where can I get a list of Larry Wall witticisms? =item How can I convince my sysadmin/supervisor/employees to use version -(5/5.004/Perl instead of some other language)? +(5/5.005/Perl instead of some other language)? =back =item AUTHOR AND COPYRIGHT -=head2 perlfaq2 - Obtaining and Learning about Perl ($Revision: 1.24 $, -$Date: 1998/07/20 23:40:28 $) +=back + +=head2 perlfaq2 - Obtaining and Learning about Perl ($Revision: 1.32 $, +$Date: 1999/10/14 18:46:09 $) + +=over =item DESCRIPTION @@ -137,30 +369,26 @@ don't work. =item I grabbed the sources and tried to compile but gdbm/dynamic loading/malloc/linking/... failed. How do I make it work? -=item What modules and extensions are available for Perl? What is CPAN? +=item What modules and extensions are available for Perl? What is CPAN? What does CPAN/src/... mean? =item Is there an ISO or ANSI certified version of Perl? =item Where can I get information on Perl? -=item What are the Perl newsgroups on USENET? Where do I post questions? +=item What are the Perl newsgroups on Usenet? Where do I post questions? =item Where should I post source code? =item Perl Books -References, Tutorials -*Learning Perl [2nd edition] -by Randal L. Schwartz and Tom Christiansen, Task-Oriented, Special Topics +References, Tutorials, Task-Oriented, Special Topics =item Perl in Magazines =item Perl on the Net: FTP and WWW Access -=item What mailing lists are there for perl? - -MacPerl, Perl5-Porters, NTPerl, Perl-Packrats +=item What mailing lists are there for Perl? =item Archives of comp.lang.perl.misc @@ -168,16 +396,18 @@ MacPerl, Perl5-Porters, NTPerl, Perl-Packrats =item Where do I send bug reports? -=item What is perl.com? perl.org? The Perl Institute? - -=item How do I learn about object-oriented Perl programming? +=item What is perl.com? Perl Mongers? pm.org? perl.org? =back =item AUTHOR AND COPYRIGHT -=head2 perlfaq3 - Programming Tools ($Revision: 1.28 $, $Date: 1998/07/16 -22:08:49 $) +=back + +=head2 perlfaq3 - Programming Tools ($Revision: 1.38 $, $Date: 1999/05/23 +16:08:30 $) + +=over =item DESCRIPTION @@ -199,6 +429,8 @@ MacPerl, Perl5-Porters, NTPerl, Perl-Packrats =item Is there a ctags for Perl? +=item Is there an IDE or Windows Perl Editor? + =item Where can I get Perl macros for vi? =item Where can I get perl-mode for emacs? @@ -225,11 +457,13 @@ MacPerl, Perl5-Porters, NTPerl, Perl-Packrats =item How can I compile my Perl program into byte code or C? +=item How can I compile Perl into Java? + =item How can I get C<#!perl> to work on [MS-DOS,NT,...]? -=item Can I write useful perl programs on the command line? +=item Can I write useful Perl programs on the command line? -=item Why don't perl one-liners work on my DOS/Mac/VMS system? +=item Why don't Perl one-liners work on my DOS/Mac/VMS system? =item Where can I learn about CGI or Web programming in Perl? @@ -249,8 +483,12 @@ mean? =item AUTHOR AND COPYRIGHT -=head2 perlfaq4 - Data Manipulation ($Revision: 1.25 $, $Date: 1998/07/16 -22:49:55 $) +=back + +=head2 perlfaq4 - Data Manipulation ($Revision: 1.49 $, $Date: 1999/05/23 +20:37:49 $) + +=over =item DESCRIPTION @@ -263,11 +501,13 @@ numbers I should be getting (eg, 19.95)? =item Why isn't my octal data interpreted correctly? -=item Does perl have a round function? What about ceil() and floor()? +=item Does Perl have a round() function? What about ceil() and floor()? Trig functions? =item How do I convert bits into ints? +=item Why doesn't & work the way I want it to? + =item How do I multiply matrices? =item How do I perform an operation on a series of integers? @@ -284,13 +524,17 @@ Trig functions? =item How do I find the week-of-the-year/day-of-the-year? +=item How do I find the current century or millennium? + =item How can I compare two dates and find the difference? =item How can I take a string and turn it into epoch seconds? =item How can I find the Julian Day? -=item Does Perl have a year 2000 problem? Is Perl Y2K compliant? +=item How do I find yesterday's date? + +=item Does Perl have a Year 2000 problem? Is Perl Y2K compliant? =back @@ -328,6 +572,8 @@ string? =item How do I strip blank space from the beginning/end of a string? +=item How do I pad a string with blanks or pad a number with zeroes? + =item How do I extract selected columns from a string? =item How do I find the soundex value of a string? @@ -348,9 +594,11 @@ the tag =over +=item What is the difference between a list and an array? + =item What is the difference between $array[1] and @array[1]? -=item How can I extract just the unique elements of an array? +=item How can I remove duplicate elements from a list or array? a) If @in is sorted, and you want @out to be sorted:(this assumes all true values in the array), b) If you don't know whether @in is sorted:, c) Like @@ -363,6 +611,8 @@ integers: =item How do I compute the difference of two arrays? How do I compute the intersection of two arrays? +=item How do I test whether two arrays or hashes are equal? + =item How do I find the first array element for which a condition is true? =item How do I handle linked lists? @@ -440,12 +690,18 @@ array of hashes or arrays? =item How do I verify a credit card checksum? +=item How do I pack arrays of doubles or floats for XS code? + =back =item AUTHOR AND COPYRIGHT -=head2 perlfaq5 - Files and Formats ($Revision: 1.24 $, $Date: 1998/07/05 -15:07:20 $) +=back + +=head2 perlfaq5 - Files and Formats ($Revision: 1.38 $, $Date: 1999/05/23 +16:08:30 $) + +=over =item DESCRIPTION @@ -481,13 +737,13 @@ filehandles between subroutines? How do I make an array of filehandles? =item Is there a leak/bug in glob()? -=item How can I open a file with a leading "E<gt>" or trailing blanks? +=item How can I open a file with a leading ">" or trailing blanks? =item How can I reliably rename a file? =item How can I lock a file? -=item What can't I just open(FH, ">file.lock")? +=item Why can't I just open(FH, ">file.lock")? =item I still don't get locking. I just want to increment the number in the file. How can I do this? @@ -500,11 +756,13 @@ the file. How can I do this? =item How do I print to more than one file at once? +=item How can I read in an entire file all at once? + =item How can I read in a file by paragraphs? =item How can I read a single character from a file? From the keyboard? -=item How can I tell if there's a character waiting on a filehandle? +=item How can I tell whether there's a character waiting on a filehandle? =item How do I do a C<tail -f> in perl? @@ -522,11 +780,17 @@ protected files? Isn't this a bug in Perl? =item How do I select a random line from a file? +=item Why do I get weird spaces when I print an array of lines? + =back =item AUTHOR AND COPYRIGHT -=head2 perlfaq6 - Regexps ($Revision: 1.22 $, $Date: 1998/07/16 14:01:07 $) +=back + +=head2 perlfaq6 - Regexes ($Revision: 1.27 $, $Date: 1999/05/23 16:08:30 $) + +=over =item DESCRIPTION @@ -535,8 +799,7 @@ protected files? Isn't this a bug in Perl? =item How can I hope to use regular expressions without creating illegible and unmaintainable code? -Comments Outside the Regexp, Comments Inside the Regexp, Different -Delimiters +Comments Outside the Regex, Comments Inside the Regex, Different Delimiters =item I'm having trouble matching over more than one line. What's wrong? @@ -552,7 +815,7 @@ case on the RHS? =item How can I match a locale-smart version of C</[a-zA-Z]/>? -=item How can I quote a variable to use in a regexp? +=item How can I quote a variable to use in a regex? =item What is C</o> really for? @@ -561,7 +824,7 @@ file? =item Can I use Perl regular expressions to match balanced text? -=item What does it mean that regexps are greedy? How can I get around it? +=item What does it mean that regexes are greedy? How can I get around it? =item How do I process each word on each line? @@ -577,18 +840,24 @@ file? =item What good is C<\G> in a regular expression? -=item Are Perl regexps DFAs or NFAs? Are they POSIX compliant? +=item Are Perl regexes DFAs or NFAs? Are they POSIX compliant? =item What's wrong with using grep or map in a void context? =item How can I match strings with multibyte characters? +=item How do I match a pattern that is supplied by the user? + =back =item AUTHOR AND COPYRIGHT -=head2 perlfaq7 - Perl Language Issues ($Revision: 1.21 $, $Date: -1998/06/22 15:20:07 $) +=back + +=head2 perlfaq7 - Perl Language Issues ($Revision: 1.28 $, $Date: +1999/05/23 20:36:18 $) + +=over =item DESCRIPTION @@ -596,7 +865,7 @@ file? =item Can I get a BNF/yacc/RE for the Perl language? -=item What are all these $@%* punctuation signs, and how do I know when to +=item What are all these $@%&* punctuation signs, and how do I know when to use them? =item Do I always/never have to quote my strings or use semicolons and @@ -623,14 +892,14 @@ commas? =item What is variable suicide and how can I prevent it? =item How can I pass/return a {Function, FileHandle, Array, Hash, Method, -Regexp}? +Regex}? -Passing Variables and Functions, Passing Filehandles, Passing Regexps, +Passing Variables and Functions, Passing Filehandles, Passing Regexes, Passing Methods =item How do I create a static variable? -=item What's the difference between dynamic and lexical (static) scoping? +=item What's the difference between dynamic and lexical (static) scoping? Between local() and my()? =item How can I access a dynamic variable while a similarly named lexical @@ -654,12 +923,20 @@ is in scope? =item How can I comment out a large block of perl code? +=item How do I clear a package? + +=item How can I use a variable as a variable name? + =back =item AUTHOR AND COPYRIGHT -=head2 perlfaq8 - System Interaction ($Revision: 1.25 $, $Date: 1998/07/05 -15:07:20 $) +=back + +=head2 perlfaq8 - System Interaction ($Revision: 1.39 $, $Date: 1999/05/23 +18:37:57 $) + +=over =item DESCRIPTION @@ -767,7 +1044,7 @@ complete? =item How do I open a file without blocking? -=item How do I install a CPAN module? +=item How do I install a module from CPAN? =item What's the difference between require and use? @@ -778,13 +1055,19 @@ search path? =item How do I add a directory to my include path at runtime? +=item What is socket.ph and where do I get it? + =back =item AUTHOR AND COPYRIGHT -=head2 perlfaq9 - Networking ($Revision: 1.20 $, $Date: 1998/06/22 18:31:09 +=back + +=head2 perlfaq9 - Networking ($Revision: 1.26 $, $Date: 1999/05/23 16:08:30 $) +=over + =item DESCRIPTION =over @@ -844,181 +1127,402 @@ CGI script to do bad things? =item AUTHOR AND COPYRIGHT -=head2 perldelta - what's new for perl5.005 +=back -=item DESCRIPTION +=head2 perldelta - what's new for perl v5.6.0 -=item About the new versioning system +=over -=item Incompatible Changes +=item DESCRIPTION + +=item Core Enhancements =over -=item WARNING: This version is not binary compatible with Perl 5.004. +=item Interpreter cloning, threads, and concurrency + +=item Lexically scoped warning categories + +=item Unicode and UTF-8 support + +=item Support for interpolating named characters + +=item "our" declarations + +=item Support for strings represented as a vector of ordinals + +=item Improved Perl version numbering system + +=item New syntax for declaring subroutine attributes + +=item File and directory handles can be autovivified + +=item open() with more than two arguments + +=item 64-bit support + +=item Large file support + +=item Long doubles + +=item "more bits" + +=item Enhanced support for sort() subroutines + +=item C<sort $coderef @foo> allowed + +=item File globbing implemented internally + +Support for CHECK blocks + +=item POSIX character class syntax [: :] supported + +Better pseudo-random number generator + +=item Improved C<qw//> operator + +Better worst-case behavior of hashes + +=item pack() format 'Z' supported + +=item pack() format modifier '!' supported + +=item pack() and unpack() support counted strings + +=item Comments in pack() templates + +=item Weak references + +=item Binary numbers supported + +=item Lvalue subroutines + +=item Some arrows may be omitted in calls through references + +=item Boolean assignment operators are legal lvalues + +=item exists() is supported on subroutine names + +=item exists() and delete() are supported on array elements + +=item Pseudo-hashes work better + +=item Automatic flushing of output buffers -=item Default installation structure has changed +=item Better diagnostics on meaningless filehandle operations -=item Perl Source Compatibility +=item Where possible, buffered data discarded from duped input filehandle -=item C Source Compatibility +=item eof() has the same old magic as <> -Core sources now require ANSI C compiler, All Perl global variables must -now be referenced with an explicit prefix, Enabling threads has source -compatibility issues +=item binmode() can be used to set :crlf and :raw modes -=item Binary Compatibility +=item C<-T> filetest recognizes UTF-8 encoded files as "text" -=item Security fixes may affect compatibility +=item system(), backticks and pipe open now reflect exec() failure -=item Relaxed new mandatory warnings introduced in 5.004 +=item Improved diagnostics -=item Licensing +=item Diagnostics follow STDERR + +More consistent close-on-exec behavior + +=item syswrite() ease-of-use + +=item Better syntax checks on parenthesized unary operators + +=item Bit operators support full native integer width + +=item Improved security features + +More functional bareword prototype (*) + +=item C<require> and C<do> may be overridden + +=item $^X variables may now have names longer than one character + +=item New variable $^C reflects C<-c> switch + +=item New variable $^V contains Perl version as a string + +=item Optional Y2K warnings =back -=item Core Changes +=item Modules and Pragmata =over -=item Threads +=item Modules -=item Compiler +attributes, B, Benchmark, ByteLoader, constant, charnames, Data::Dumper, +DB, DB_File, Devel::DProf, Devel::Peek, Dumpvalue, DynaLoader, English, +Env, Fcntl, File::Compare, File::Find, File::Glob, File::Spec, +File::Spec::Functions, Getopt::Long, IO, JPL, lib, Math::BigInt, +Math::Complex, Math::Trig, Pod::Parser, Pod::InputObjects, Pod::Checker, +podchecker, Pod::ParseUtils, Pod::Find, Pod::Select, podselect, Pod::Usage, +pod2usage, Pod::Text and Pod::Man, SDBM_File, Sys::Syslog, Sys::Hostname, +Term::ANSIColor, Time::Local, Win32, XSLoader, DBM Filters -=item Regular Expressions +=item Pragmata -Many new and improved optimizations, Many bug fixes, New regular expression -constructs, New operator for precompiled regular expressions, Other -improvements, Incompatible changes +=back -=item Improved malloc() +=item Utility Changes -=item Quicksort is internally implemented +=over -=item Reliable signals +=item dprofpp -=item Reliable stack pointers +=item find2perl -=item More generous treatment of carriage returns +=item h2xs -=item Memory leaks +=item perlcc -=item Better support for multiple interpreters +=item perldoc -=item Behavior of local() on array and hash elements is now well-defined +=item The Perl Debugger -=item C<%!> is transparently tied to the L<Errno> module +=back -=item Pseudo-hashes are supported +=item Improved Documentation -=item C<EXPR foreach EXPR> is supported +perlapi.pod, perlboot.pod, perlcompile.pod, perldbmfilter.pod, +perldebug.pod, perldebguts.pod, perlfork.pod, perlfilter.pod, perlhack.pod, +perlintern.pod, perllexwarn.pod, perlnumber.pod, perlopentut.pod, +perlreftut.pod, perltootc.pod, perltodo.pod, perlunicode.pod -=item Keywords can be globally overridden +=item Performance enhancements -=item C<$^E> is meaningful on Win32 +=over -=item C<foreach (1..1000000)> optimized +=item Simple sort() using { $a <=> $b } and the like are optimized -=item C<Foo::> can be used as implicitly quoted package name +=item Optimized assignments to lexical variables -=item C<exists $Foo::{Bar::}> tests existence of a package +=item Faster subroutine calls -=item Better locale support +delete(), each(), values() and hash iteration are faster -=item Experimental support for 64-bit platforms +=back -=item prototype() returns useful results on builtins +=item Installation and Configuration Improvements -=item Extended support for exception handling +=over -=item Re-blessing in DESTROY() supported for chaining DESTROY() methods +=item -Dusethreads means something different -=item All C<printf> format conversions are handled internally +=item New Configure flags -=item New C<INIT> keyword +=item Threadedness and 64-bitness now more daring -=item New C<lock> keyword +=item Long Doubles -=item New C<qr//> operator +=item -Dusemorebits -=item C<our> is now a reserved word +=item -Duselargefiles -=item Tied arrays are now fully supported +=item installusrbinperl -=item Tied handles support is better +=item SOCKS support -=item 4th argument to substr +=item C<-A> flag -=item Negative LENGTH argument to splice +=item Enhanced Installation Directories -=item Magic lvalues are now more magical +=back -=item E<lt>E<gt> now reads in records +=item Platform specific changes + +=over + +=item Supported platforms + +=item DOS + +=item OS390 (OpenEdition MVS) + +=item VMS + +=item Win32 =back -=item Supported Platforms +=item Significant bug fixes =over -=item New Platforms +=item <HANDLE> on empty files + +=item C<eval '...'> improvements + +=item All compilation errors are true errors + +=item Implicitly closed filehandles are safer + +=item Behavior of list slices is more consistent + +=item C<(\$)> prototype and C<$foo{a}> -=item Changes in existing support +=item C<goto &sub> and AUTOLOAD + +=item C<-bareword> allowed under C<use integer> + +=item Failures in DESTROY() + +=item Locale bugs fixed + +=item Memory leaks + +=item Spurious subroutine stubs after failed subroutine calls + +=item Taint failures under C<-U> + +=item END blocks and the C<-c> switch + +=item Potential to leak DATA filehandles =back -=item Modules and Pragmata +=item New or Changed Diagnostics + +"%s" variable %s masks earlier declaration in same %s, "my sub" not yet +implemented, "our" variable %s redeclared, '!' allowed only after types %s, +/ cannot take a count, / must be followed by a, A or Z, / must be followed +by a*, A* or Z*, / must follow a numeric type, /%s/: Unrecognized escape +\\%c passed through, /%s/: Unrecognized escape \\%c in character class +passed through, /%s/ should probably be written as "%s", %s() called too +early to check prototype, %s argument is not a HASH or ARRAY element, %s +argument is not a HASH or ARRAY element or slice, %s argument is not a +subroutine name, %s package attribute may clash with future reserved word: +%s, (in cleanup) %s, <> should be quotes, Attempt to join self, Bad evalled +substitution pattern, Bad realloc() ignored, Bareword found in conditional, +Binary number > 0b11111111111111111111111111111111 non-portable, Bit vector +size > 32 non-portable, Buffer overflow in prime_env_iter: %s, Can't check +filesystem of script "%s", Can't declare class for non-scalar %s in "%s", +Can't declare %s in "%s", Can't ignore signal CHLD, forcing to default, +Can't modify non-lvalue subroutine call, Can't read CRTL environ, Can't +remove %s: %s, skipping file, Can't return %s from lvalue subroutine, Can't +weaken a nonreference, Character class [:%s:] unknown, Character class +syntax [%s] belongs inside character classes, Constant is not %s reference, +constant(%s): %s, CORE::%s is not a keyword, defined(@array) is deprecated, +defined(%hash) is deprecated, Did not produce a valid header, (Did you mean +"local" instead of "our"?), Document contains no data, entering effective +%s failed, false [] range "%s" in regexp, Filehandle %s opened only for +output, flock() on closed filehandle %s, Global symbol "%s" requires +explicit package name, Hexadecimal number > 0xffffffff non-portable, +Ill-formed CRTL environ value "%s", Ill-formed message in prime_env_iter: +|%s|, Illegal binary digit %s, Illegal binary digit %s ignored, Illegal +number of bits in vec, Integer overflow in %s number, Invalid %s attribute: +%s, Invalid %s attributes: %s, invalid [] range "%s" in regexp, Invalid +separator character %s in attribute list, Invalid separator character %s in +subroutine attribute list, leaving effective %s failed, Lvalue subs +returning %s not implemented yet, Method %s not permitted, Missing +%sbrace%s on \N{}, Missing command in piped open, Missing name in "my sub", +No %s specified for -%c, No package name allowed for variable %s in "our", +No space allowed after -%c, no UTC offset information; assuming local time +is UTC, Octal number > 037777777777 non-portable, panic: del_backref, +panic: kid popen errno read, panic: magic_killbackrefs, Parentheses missing +around "%s" list, Possible Y2K bug: %s, pragma "attrs" is deprecated, use +"sub NAME : ATTRS" instead, Premature end of script headers, Repeat count +in pack overflows, Repeat count in unpack overflows, realloc() of freed +memory ignored, Reference is already weak, setpgrp can't take arguments, +Strange *+?{} on zero-length expression, switching effective %s is not +implemented, This Perl can't reset CRTL environ elements (%s), This Perl +can't set CRTL environ elements (%s=%s), Too late to run %s block, Unknown +open() mode '%s', Unknown process %x sent message to prime_env_iter: %s, +Unrecognized escape \\%c passed through, Unterminated attribute parameter +in attribute list, Unterminated attribute list, Unterminated attribute +parameter in subroutine attribute list, Unterminated subroutine attribute +list, Value of CLI symbol "%s" too long, Version number must be a constant +number + +=item New tests + +=item Incompatible Changes =over -=item New Modules +=item Perl Source Incompatibilities -B, Data::Dumper, Errno, File::Spec, ExtUtils::Installed, -ExtUtils::Packlist, Fatal, IPC::SysV, Test, Tie::Array, Tie::Handle, -Thread, attrs, fields, re +CHECK is a new keyword, Treatment of list slices of undef has changed -=item Changes in existing modules +=item Format of $English::PERL_VERSION is different -CGI, POSIX, DB_File, MakeMaker, CPAN, Cwd, Benchmark +Literals of the form C<1.2.3> parse differently, Possibly changed +pseudo-random number generator, Hashing function for hash keys has changed, +C<undef> fails on read only values, Close-on-exec bit may be set on pipe +and socket handles, Writing C<"$$1"> to mean C<"${$}1"> is unsupported, +delete(), values() and C<\(%h)> operate on aliases to values, not copies, +vec(EXPR,OFFSET,BITS) enforces powers-of-two BITS, Text of some diagnostic +output has changed, C<%@> has been removed, Parenthesized not() behaves +like a list operator, Semantics of bareword prototype C<(*)> have changed + +=item Semantics of bit operators may have changed on 64-bit platforms + +=item More builtins taint their results + +=item C Source Incompatibilities + +C<PERL_POLLUTE>, C<PERL_IMPLICIT_CONTEXT>, C<PERL_POLLUTE_MALLOC> + +=item Compatible C Source API Changes + +C<PATCHLEVEL> is now C<PERL_VERSION> + +=item Binary Incompatibilities =back -=item Utility Changes +=item Known Problems + +=over + +=item Thread test failures + +=item EBCDIC platforms not supported + +=item In 64-bit HP-UX the lib/io_multihomed test may hang -=item Documentation Changes - -=item New Diagnostics - -Ambiguous call resolved as CORE::%s(), qualify as such or use &, Bad index -while coercing array into hash, Bareword "%s" refers to nonexistent -package, Can't call method "%s" on an undefined value, Can't coerce array -into hash, Can't goto subroutine from an eval-string, Can't localize -pseudo-hash element, Can't use %%! because Errno.pm is not available, -Cannot find an opnumber for "%s", Character class syntax [. .] is reserved -for future extensions, Character class syntax [: :] is reserved for future -extensions, Character class syntax [= =] is reserved for future extensions, -%s: Eval-group in insecure regular expression, %s: Eval-group not allowed, -use re 'eval', %s: Eval-group not allowed at run time, Explicit blessing to -'' (assuming package main), Illegal hex digit ignored, No such array field, -No such field "%s" in variable %s of type %s, Out of memory during -ridiculously large request, Range iterator outside integer range, Recursive -inheritance detected while looking for method '%s' in package '%s', -Reference found where even-sized list expected, Undefined value assigned to -typeglob, Use of reserved word "%s" is deprecated, perl: warning: Setting -locale failed +=item NEXTSTEP 3.3 POSIX test failure + +=item Tru64 (aka Digital UNIX, aka DEC OSF/1) lib/sdbm test failure with +gcc + +=item UNICOS/mk CC failures during Configure run + +=item Arrow operator and arrays + +=item Windows 2000 + +=item Experimental features + +Threads, Unicode, 64-bit support, Lvalue subroutines, Weak references, The +pseudo-hash data type, The Compiler suite, Internal implementation of file +globbing, The DB module, The regular expression constructs C<(?{ code })> +and C<(??{ code })> + +=back =item Obsolete Diagnostics -Can't mktemp(), Can't write to temp file for B<-e>: %s, Cannot open -temporary file +Character class syntax [: :] is reserved for future extensions, Ill-formed +logical name |%s| in prime_env_iter, Probable precedence problem on %s, +regexp too big, Use of "$$<digit>" to mean "${$}<digit>" is deprecated -=item BUGS +=item Reporting Bugs =item SEE ALSO =item HISTORY +=back + =head2 perldata - Perl data types +=over + =item DESCRIPTION =over @@ -1033,12 +1537,20 @@ temporary file =item List value constructors +=item Slices + =item Typeglobs and Filehandles =back +=item SEE ALSO + +=back + =head2 perlsyn - Perl syntax +=over + =item DESCRIPTION =over @@ -1065,8 +1577,12 @@ temporary file =back +=back + =head2 perlop - Perl operators and precedence +=over + =item SYNOPSIS =item DESCRIPTION @@ -1131,14 +1647,14 @@ unary &, unary *, (TYPE) ?PATTERN?, m/PATTERN/cgimosx, /PATTERN/cgimosx, q/STRING/, C<'STRING'>, qq/STRING/, "STRING", qr/STRING/imosx, qx/STRING/, `STRING`, qw/STRING/, -s/PATTERN/REPLACEMENT/egimosx, tr/SEARCHLIST/REPLACEMENTLIST/cds, -y/SEARCHLIST/REPLACEMENTLIST/cds +s/PATTERN/REPLACEMENT/egimosx, tr/SEARCHLIST/REPLACEMENTLIST/cdsUC, +y/SEARCHLIST/REPLACEMENTLIST/cdsUC =item Gory details of parsing quoted constructs Finding the end, Removal of backslashes before delimiters, Interpolation, C<<<'EOF'>, C<m''>, C<s'''>, C<tr///>, C<y///>, C<''>, C<q//>, C<"">, -C<``>, C<qq//>, C<qx//>, C<<file*globE<gt>>, C<?RE?>, C</RE/>, C<m/RE/>, +C<``>, C<qq//>, C<qx//>, C<< <file*glob> >>, C<?RE?>, C</RE/>, C<m/RE/>, C<s/RE/foo/>,, Interpolation of regular expressions, Optimization of regular expressions @@ -1156,8 +1672,12 @@ regular expressions =back +=back + =head2 perlre - Perl regular expressions +=over + =item DESCRIPTION i, m, s, x @@ -1166,27 +1686,44 @@ i, m, s, x =item Regular Expressions -C<(?#text)>, C<(?:pattern)>, C<(?imsx-imsx:pattern)>, C<(?=pattern)>, -C<(?!pattern)>, C<(?E<lt>=pattern)>, C<(?<!pattern)>, C<(?{ code })>, -C<(?E<gt>pattern)>, C<(?(condition)yes-pattern|no-pattern)>, -C<(?(condition)yes-pattern)>, C<(?imsx-imsx)> +cntrl, graph, print, punct, xdigit + +=item Extended Patterns + +C<(?#text)>, C<(?imsx-imsx)>, C<(?:pattern)>, C<(?imsx-imsx:pattern)>, +C<(?=pattern)>, C<(?!pattern)>, C<(?<=pattern)>, C<(?<!pattern)>, C<(?{ +code })>, C<(??{ code })>, C<< (?>pattern) >>, +C<(?(condition)yes-pattern|no-pattern)>, C<(?(condition)yes-pattern)> =item Backtracking =item Version 8 Regular Expressions -=item WARNING on \1 vs $1 +=item Warning on \1 vs $1 =item Repeated patterns matching zero-length substring +=item Combining pieces together + +C<ST>, C<S|T>, C<S{REPEAT_COUNT}>, C<S{min,max}>, C<S{min,max}?>, C<S?>, +C<S*>, C<S+>, C<S??>, C<S*?>, C<S+?>, C<< (?>S) >>, C<(?=S)>, C<(?<=S)>, +C<(?!S)>, C<(?<!S)>, C<(??{ EXPR })>, +C<(?(condition)yes-pattern|no-pattern)> + =item Creating custom RE engines +=back + +=item BUGS + =item SEE ALSO =back =head2 perlrun - how to execute the Perl interpreter +=over + =item SYNOPSIS =item DESCRIPTION @@ -1195,28 +1732,33 @@ C<(?(condition)yes-pattern)>, C<(?imsx-imsx)> =item #! and quoting on non-Unix systems -OS/2, MS-DOS, Win95/NT, Macintosh +OS/2, MS-DOS, Win95/NT, Macintosh, VMS =item Location of Perl -=item Switches +=item Command Switches -B<-0>[I<digits>], B<-a>, B<-c>, B<-d>, B<-d:>I<foo>, B<-D>I<letters>, -B<-D>I<number>, B<-e> I<commandline>, B<-F>I<pattern>, B<-h>, -B<-i>[I<extension>], B<-I>I<directory>, B<-l>[I<octnum>], +B<-0>[I<digits>], B<-a>, B<-C>, B<-c>, B<-d>, B<-d:>I<foo>, +B<-D>I<letters>, B<-D>I<number>, B<-e> I<commandline>, B<-F>I<pattern>, +B<-h>, B<-i>[I<extension>], B<-I>I<directory>, B<-l>[I<octnum>], B<-m>[B<->]I<module>, B<-M>[B<->]I<module>, B<-M>[B<->]I<'module ...'>, B<-[mM]>[B<->]I<module=arg[,arg]...>, B<-n>, B<-p>, B<-P>, B<-s>, B<-S>, -B<-T>, B<-u>, B<-U>, B<-v>, B<-V>, B<-V:>I<name>, B<-w>, B<-x> I<directory> +B<-T>, B<-u>, B<-U>, B<-v>, B<-V>, B<-V:>I<name>, B<-w>, B<-W>, B<-X>, +B<-x> I<directory> =back =item ENVIRONMENT HOME, LOGDIR, PATH, PERL5LIB, PERL5OPT, PERLLIB, PERL5DB, PERL5SHELL -(specific to WIN32 port), PERL_DEBUG_MSTATS, PERL_DESTRUCT_LEVEL +(specific to the Win32 port), PERL_DEBUG_MSTATS, PERL_DESTRUCT_LEVEL + +=back =head2 perlfunc - Perl builtin functions +=over + =item DESCRIPTION =over @@ -1235,49 +1777,53 @@ communication functions, Fetching user and group info, Fetching network info, Time-related functions, Functions new in perl5, Functions obsoleted in perl5 +=item Portability + =item Alphabetical Listing of Perl Functions I<-X> FILEHANDLE, I<-X> EXPR, I<-X>, abs VALUE, abs, accept NEWSOCKET,GENERICSOCKET, alarm SECONDS, alarm, atan2 Y,X, bind SOCKET,NAME, -binmode FILEHANDLE, bless REF,CLASSNAME, bless REF, caller EXPR, caller, -chdir EXPR, chmod LIST, chomp VARIABLE, chomp LIST, chomp, chop VARIABLE, -chop LIST, chop, chown LIST, chr NUMBER, chr, chroot FILENAME, chroot, -close FILEHANDLE, close, closedir DIRHANDLE, connect SOCKET,NAME, continue -BLOCK, cos EXPR, crypt PLAINTEXT,SALT, dbmclose HASH, dbmopen -HASH,DBNAME,MODE, defined EXPR, defined, delete EXPR, die LIST, do BLOCK, -do SUBROUTINE(LIST), do EXPR, dump LABEL, each HASH, eof FILEHANDLE, eof -(), eof, eval EXPR, eval BLOCK, exec LIST, exec PROGRAM LIST, exists EXPR, -exit EXPR, exp EXPR, exp, fcntl FILEHANDLE,FUNCTION,SCALAR, fileno -FILEHANDLE, flock FILEHANDLE,OPERATION, fork, format, formline -PICTURE,LIST, getc FILEHANDLE, getc, getlogin, getpeername SOCKET, getpgrp -PID, getppid, getpriority WHICH,WHO, getpwnam NAME, getgrnam NAME, -gethostbyname NAME, getnetbyname NAME, getprotobyname NAME, getpwuid UID, -getgrgid GID, getservbyname NAME,PROTO, gethostbyaddr ADDR,ADDRTYPE, -getnetbyaddr ADDR,ADDRTYPE, getprotobynumber NUMBER, getservbyport -PORT,PROTO, getpwent, getgrent, gethostent, getnetent, getprotoent, -getservent, setpwent, setgrent, sethostent STAYOPEN, setnetent STAYOPEN, -setprotoent STAYOPEN, setservent STAYOPEN, endpwent, endgrent, endhostent, -endnetent, endprotoent, endservent, getsockname SOCKET, getsockopt -SOCKET,LEVEL,OPTNAME, glob EXPR, glob, gmtime EXPR, goto LABEL, goto EXPR, -goto &NAME, grep BLOCK LIST, grep EXPR,LIST, hex EXPR, hex, import, index -STR,SUBSTR,POSITION, index STR,SUBSTR, int EXPR, int, ioctl -FILEHANDLE,FUNCTION,SCALAR, join EXPR,LIST, keys HASH, kill LIST, last -LABEL, last, lc EXPR, lc, lcfirst EXPR, lcfirst, length EXPR, length, link -OLDFILE,NEWFILE, listen SOCKET,QUEUESIZE, local EXPR, localtime EXPR, log -EXPR, log, lstat FILEHANDLE, lstat EXPR, lstat, m//, map BLOCK LIST, map -EXPR,LIST, mkdir FILENAME,MODE, msgctl ID,CMD,ARG, msgget KEY,FLAGS, msgsnd -ID,MSG,FLAGS, msgrcv ID,VAR,SIZE,TYPE,FLAGS, my EXPR, next LABEL, next, no -Module LIST, oct EXPR, oct, open FILEHANDLE,EXPR, open FILEHANDLE, opendir -DIRHANDLE,EXPR, ord EXPR, ord, pack TEMPLATE,LIST, package, package -NAMESPACE, pipe READHANDLE,WRITEHANDLE, pop ARRAY, pop, pos SCALAR, pos, -print FILEHANDLE LIST, print LIST, print, printf FILEHANDLE FORMAT, LIST, -printf FORMAT, LIST, prototype FUNCTION, push ARRAY,LIST, q/STRING/, -qq/STRING/, qr/STRING/, qx/STRING/, qw/STRING/, quotemeta EXPR, quotemeta, -rand EXPR, rand, read FILEHANDLE,SCALAR,LENGTH,OFFSET, read -FILEHANDLE,SCALAR,LENGTH, readdir DIRHANDLE, readline EXPR, readlink EXPR, -readlink, readpipe EXPR, recv SOCKET,SCALAR,LEN,FLAGS, redo LABEL, redo, -ref EXPR, ref, rename OLDNAME,NEWNAME, require EXPR, require, reset EXPR, -reset, return EXPR, return, reverse LIST, rewinddir DIRHANDLE, rindex +binmode FILEHANDLE, DISCIPLINE, binmode FILEHANDLE, bless REF,CLASSNAME, +bless REF, caller EXPR, caller, chdir EXPR, chmod LIST, chomp VARIABLE, +chomp LIST, chomp, chop VARIABLE, chop LIST, chop, chown LIST, chr NUMBER, +chr, chroot FILENAME, chroot, close FILEHANDLE, close, closedir DIRHANDLE, +connect SOCKET,NAME, continue BLOCK, cos EXPR, crypt PLAINTEXT,SALT, +dbmclose HASH, dbmopen HASH,DBNAME,MASK, defined EXPR, defined, delete +EXPR, die LIST, do BLOCK, do SUBROUTINE(LIST), do EXPR, dump LABEL, dump, +each HASH, eof FILEHANDLE, eof (), eof, eval EXPR, eval BLOCK, exec LIST, +exec PROGRAM LIST, exists EXPR, exit EXPR, exp EXPR, exp, fcntl +FILEHANDLE,FUNCTION,SCALAR, fileno FILEHANDLE, flock FILEHANDLE,OPERATION, +fork, format, formline PICTURE,LIST, getc FILEHANDLE, getc, getlogin, +getpeername SOCKET, getpgrp PID, getppid, getpriority WHICH,WHO, getpwnam +NAME, getgrnam NAME, gethostbyname NAME, getnetbyname NAME, getprotobyname +NAME, getpwuid UID, getgrgid GID, getservbyname NAME,PROTO, gethostbyaddr +ADDR,ADDRTYPE, getnetbyaddr ADDR,ADDRTYPE, getprotobynumber NUMBER, +getservbyport PORT,PROTO, getpwent, getgrent, gethostent, getnetent, +getprotoent, getservent, setpwent, setgrent, sethostent STAYOPEN, setnetent +STAYOPEN, setprotoent STAYOPEN, setservent STAYOPEN, endpwent, endgrent, +endhostent, endnetent, endprotoent, endservent, getsockname SOCKET, +getsockopt SOCKET,LEVEL,OPTNAME, glob EXPR, glob, gmtime EXPR, goto LABEL, +goto EXPR, goto &NAME, grep BLOCK LIST, grep EXPR,LIST, hex EXPR, hex, +import, index STR,SUBSTR,POSITION, index STR,SUBSTR, int EXPR, int, ioctl +FILEHANDLE,FUNCTION,SCALAR, join EXPR,LIST, keys HASH, kill SIGNAL, LIST, +last LABEL, last, lc EXPR, lc, lcfirst EXPR, lcfirst, length EXPR, length, +link OLDFILE,NEWFILE, listen SOCKET,QUEUESIZE, local EXPR, localtime EXPR, +lock, log EXPR, log, lstat FILEHANDLE, lstat EXPR, lstat, m//, map BLOCK +LIST, map EXPR,LIST, mkdir FILENAME,MASK, mkdir FILENAME, msgctl +ID,CMD,ARG, msgget KEY,FLAGS, msgrcv ID,VAR,SIZE,TYPE,FLAGS, msgsnd +ID,MSG,FLAGS, my EXPR, my EXPR : ATTRIBUTES, next LABEL, next, no Module +LIST, oct EXPR, oct, open FILEHANDLE,MODE,LIST, open FILEHANDLE,EXPR, open +FILEHANDLE, opendir DIRHANDLE,EXPR, ord EXPR, ord, our EXPR, pack +TEMPLATE,LIST, package, package NAMESPACE, pipe READHANDLE,WRITEHANDLE, pop +ARRAY, pop, pos SCALAR, pos, print FILEHANDLE LIST, print LIST, print, +printf FILEHANDLE FORMAT, LIST, printf FORMAT, LIST, prototype FUNCTION, +push ARRAY,LIST, q/STRING/, qq/STRING/, qr/STRING/, qx/STRING/, qw/STRING/, +quotemeta EXPR, quotemeta, rand EXPR, rand, read +FILEHANDLE,SCALAR,LENGTH,OFFSET, read FILEHANDLE,SCALAR,LENGTH, readdir +DIRHANDLE, readline EXPR, readlink EXPR, readlink, readpipe EXPR, recv +SOCKET,SCALAR,LENGTH,FLAGS, redo LABEL, redo, ref EXPR, ref, rename +OLDNAME,NEWNAME, require VERSION, require EXPR, require, reset EXPR, reset, +return EXPR, return, reverse LIST, rewinddir DIRHANDLE, rindex STR,SUBSTR,POSITION, rindex STR,SUBSTR, rmdir FILENAME, rmdir, s///, scalar EXPR, seek FILEHANDLE,POSITION,WHENCE, seekdir DIRHANDLE,POS, select FILEHANDLE, select, select RBITS,WBITS,EBITS,TIMEOUT, semctl @@ -1289,81 +1835,103 @@ shmwrite ID,STRING,POS,SIZE, shutdown SOCKET,HOW, sin EXPR, sin, sleep EXPR, sleep, socket SOCKET,DOMAIN,TYPE,PROTOCOL, socketpair SOCKET1,SOCKET2,DOMAIN,TYPE,PROTOCOL, sort SUBNAME LIST, sort BLOCK LIST, sort LIST, splice ARRAY,OFFSET,LENGTH,LIST, splice ARRAY,OFFSET,LENGTH, -splice ARRAY,OFFSET, split /PATTERN/,EXPR,LIMIT, split /PATTERN/,EXPR, -split /PATTERN/, split, sprintf FORMAT, LIST, sqrt EXPR, sqrt, srand EXPR, -srand, stat FILEHANDLE, stat EXPR, stat, study SCALAR, study, sub BLOCK, -sub NAME, sub NAME BLOCK, substr EXPR,OFFSET,LEN,REPLACEMENT, substr -EXPR,OFFSET,LEN, substr EXPR,OFFSET, symlink OLDFILE,NEWFILE, syscall LIST, -sysopen FILEHANDLE,FILENAME,MODE, sysopen FILEHANDLE,FILENAME,MODE,PERMS, -sysread FILEHANDLE,SCALAR,LENGTH,OFFSET, sysread FILEHANDLE,SCALAR,LENGTH, -sysseek FILEHANDLE,POSITION,WHENCE, system LIST, system PROGRAM LIST, -syswrite FILEHANDLE,SCALAR,LENGTH,OFFSET, syswrite -FILEHANDLE,SCALAR,LENGTH, tell FILEHANDLE, tell, telldir DIRHANDLE, tie +splice ARRAY,OFFSET, splice ARRAY, split /PATTERN/,EXPR,LIMIT, split +/PATTERN/,EXPR, split /PATTERN/, split, sprintf FORMAT, LIST, sqrt EXPR, +sqrt, srand EXPR, srand, stat FILEHANDLE, stat EXPR, stat, study SCALAR, +study, sub BLOCK, sub NAME, sub NAME BLOCK, substr +EXPR,OFFSET,LENGTH,REPLACEMENT, substr EXPR,OFFSET,LENGTH, substr +EXPR,OFFSET, symlink OLDFILE,NEWFILE, syscall LIST, sysopen +FILEHANDLE,FILENAME,MODE, sysopen FILEHANDLE,FILENAME,MODE,PERMS, sysread +FILEHANDLE,SCALAR,LENGTH,OFFSET, sysread FILEHANDLE,SCALAR,LENGTH, sysseek +FILEHANDLE,POSITION,WHENCE, system LIST, system PROGRAM LIST, syswrite +FILEHANDLE,SCALAR,LENGTH,OFFSET, syswrite FILEHANDLE,SCALAR,LENGTH, +syswrite FILEHANDLE,SCALAR, tell FILEHANDLE, tell, telldir DIRHANDLE, tie VARIABLE,CLASSNAME,LIST, tied VARIABLE, time, times, tr///, truncate FILEHANDLE,LENGTH, truncate EXPR,LENGTH, uc EXPR, uc, ucfirst EXPR, ucfirst, umask EXPR, umask, undef EXPR, undef, unlink LIST, unlink, unpack -TEMPLATE,EXPR, untie VARIABLE, unshift ARRAY,LIST, use Module LIST, use -Module, use Module VERSION LIST, use VERSION, utime LIST, values HASH, vec -EXPR,OFFSET,BITS, wait, waitpid PID,FLAGS, wantarray, warn LIST, write -FILEHANDLE, write EXPR, write, y/// +TEMPLATE,EXPR, untie VARIABLE, unshift ARRAY,LIST, use Module VERSION LIST, +use Module VERSION, use Module LIST, use Module, use VERSION, utime LIST, +values HASH, vec EXPR,OFFSET,BITS, wait, waitpid PID,FLAGS, wantarray, warn +LIST, write FILEHANDLE, write EXPR, write, y/// + +=back =back =head2 perlvar - Perl predefined variables +=over + =item DESCRIPTION =over =item Predefined Names -$ARG, $_, $E<lt>I<digits>E<gt>, $MATCH, $&, $PREMATCH, $`, $POSTMATCH, $', -$LAST_PAREN_MATCH, $+, $MULTILINE_MATCHING, $*, input_line_number HANDLE -EXPR, $INPUT_LINE_NUMBER, $NR, $, input_record_separator HANDLE EXPR, -$INPUT_RECORD_SEPARATOR, $RS, $/, autoflush HANDLE EXPR, $OUTPUT_AUTOFLUSH, -$|, output_field_separator HANDLE EXPR, $OUTPUT_FIELD_SEPARATOR, $OFS, $,, -output_record_separator HANDLE EXPR, $OUTPUT_RECORD_SEPARATOR, $ORS, $\, -$LIST_SEPARATOR, $", $SUBSCRIPT_SEPARATOR, $SUBSEP, $;, $OFMT, $#, -format_page_number HANDLE EXPR, $FORMAT_PAGE_NUMBER, $%, -format_lines_per_page HANDLE EXPR, $FORMAT_LINES_PER_PAGE, $=, -format_lines_left HANDLE EXPR, $FORMAT_LINES_LEFT, $-, format_name HANDLE -EXPR, $FORMAT_NAME, $~, format_top_name HANDLE EXPR, $FORMAT_TOP_NAME, $^, -format_line_break_characters HANDLE EXPR, $FORMAT_LINE_BREAK_CHARACTERS, -$:, format_formfeed HANDLE EXPR, $FORMAT_FORMFEED, $^L, $ACCUMULATOR, $^A, -$CHILD_ERROR, $?, $OS_ERROR, $ERRNO, $!, $EXTENDED_OS_ERROR, $^E, -$EVAL_ERROR, $@, $PROCESS_ID, $PID, $$, $REAL_USER_ID, $UID, $<, -$EFFECTIVE_USER_ID, $EUID, $>, $REAL_GROUP_ID, $GID, $(, -$EFFECTIVE_GROUP_ID, $EGID, $), $PROGRAM_NAME, $0, $[, $PERL_VERSION, $], -$DEBUGGING, $^D, $SYSTEM_FD_MAX, $^F, $^H, $INPLACE_EDIT, $^I, $^M, -$OSNAME, $^O, $PERLDB, $^P, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, $^R, $^S, -$BASETIME, $^T, $WARNING, $^W, $EXECUTABLE_NAME, $^X, $ARGV, @ARGV, @INC, -@_, %INC, %ENV $ENV{expr}, %SIG $SIG{expr} +$ARG, $_, $<I<digits>>, $MATCH, $&, $PREMATCH, $`, $POSTMATCH, $', +$LAST_PAREN_MATCH, $+, @+, $MULTILINE_MATCHING, $*, input_line_number +HANDLE EXPR, $INPUT_LINE_NUMBER, $NR, $, input_record_separator HANDLE +EXPR, $INPUT_RECORD_SEPARATOR, $RS, $/, autoflush HANDLE EXPR, +$OUTPUT_AUTOFLUSH, $|, output_field_separator HANDLE EXPR, +$OUTPUT_FIELD_SEPARATOR, $OFS, $,, output_record_separator HANDLE EXPR, +$OUTPUT_RECORD_SEPARATOR, $ORS, $\, $LIST_SEPARATOR, $", +$SUBSCRIPT_SEPARATOR, $SUBSEP, $;, $OFMT, $#, format_page_number HANDLE +EXPR, $FORMAT_PAGE_NUMBER, $%, format_lines_per_page HANDLE EXPR, +$FORMAT_LINES_PER_PAGE, $=, format_lines_left HANDLE EXPR, +$FORMAT_LINES_LEFT, $-, @-, C<$`> is the same as C<substr($var, 0, $-[0]>), +C<$&> is the same as C<substr($var, $-[0], $+[0] - $-[0]>), C<$'> is the +same as C<substr($var, $+[0]>), C<$1> is the same as C<substr($var, $-[1], +$+[1] - $-[1])>, C<$2> is the same as C<substr($var, $-[2], $+[2] - +$-[2])>, C<$3> is the same as C<substr $var, $-[3], $+[3] - $-[3]>), +format_name HANDLE EXPR, $FORMAT_NAME, $~, format_top_name HANDLE EXPR, +$FORMAT_TOP_NAME, $^, format_line_break_characters HANDLE EXPR, +$FORMAT_LINE_BREAK_CHARACTERS, $:, format_formfeed HANDLE EXPR, +$FORMAT_FORMFEED, $^L, $ACCUMULATOR, $^A, $CHILD_ERROR, $?, $OS_ERROR, +$ERRNO, $!, $EXTENDED_OS_ERROR, $^E, $EVAL_ERROR, $@, $PROCESS_ID, $PID, +$$, $REAL_USER_ID, $UID, $<, $EFFECTIVE_USER_ID, $EUID, $>, $REAL_GROUP_ID, +$GID, $(, $EFFECTIVE_GROUP_ID, $EGID, $), $PROGRAM_NAME, $0, $[, $], +$COMPILING, $^C, $DEBUGGING, $^D, $SYSTEM_FD_MAX, $^F, $^H, %^H, +$INPLACE_EDIT, $^I, $^M, $OSNAME, $^O, $PERLDB, $^P, 0x01, 0x02, 0x04, +0x08, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, $LAST_REGEXP_CODE_RESULT, $^R, +$EXCEPTIONS_BEING_CAUGHT, $^S, $BASETIME, $^T, $PERL_VERSION, $^V, +$WARNING, $^W, ${^WARNING_BITS}, ${^WIDE_SYSTEM_CALLS}, $EXECUTABLE_NAME, +$^X, $ARGV, @ARGV, @INC, @_, %INC, %ENV, $ENV{expr}, %SIG, $SIG{expr} =item Error Indicators +=item Technical Note on the Syntax of Variable Names + +=back + +=item BUGS + =back =head2 perlsub - Perl subroutines +=over + =item SYNOPSIS =item DESCRIPTION =over -=item Private Variables via C<my()> +=item Private Variables via my() =item Persistent Private Variables =item Temporary Values via local() +=item Lvalue subroutines + =item Passing Symbol Table Entries (typeglobs) =item When to Still Use local() -1. You need to give a global variable a temporary value, especially C<$_>, -2. You need to create a local file or directory handle or a local function, -3. You want to temporarily change just one element of an array or hash +1. You need to give a global variable a temporary value, especially $_, 2. +You need to create a local file or directory handle or a local function, 3. +You want to temporarily change just one element of an array or hash =item Pass by Reference @@ -1371,16 +1939,22 @@ $BASETIME, $^T, $WARNING, $^W, $EXECUTABLE_NAME, $^X, $ARGV, @ARGV, @INC, =item Constant Functions -=item Overriding Builtin Functions +=item Overriding Built-in Functions =item Autoloading +=item Subroutine Attributes + =back =item SEE ALSO +=back + =head2 perlmod - Perl modules (packages and symbol tables) +=over + =item DESCRIPTION =over @@ -1399,8 +1973,12 @@ $BASETIME, $^T, $WARNING, $^W, $EXECUTABLE_NAME, $^X, $ARGV, @ARGV, @INC, =item SEE ALSO +=back + =head2 perlmodlib - constructing new Perl modules and finding existing ones +=over + =item DESCRIPTION =item THE PERL MODULE LIBRARY @@ -1409,27 +1987,40 @@ $BASETIME, $^T, $WARNING, $^W, $EXECUTABLE_NAME, $^X, $ARGV, @ARGV, @INC, =item Pragmatic Modules -use autouse MODULE => qw(sub1 sub2 sub3), blib, diagnostics, integer, less, -lib, locale, ops, overload, re, sigtrap, strict, subs, vmsish, vars +attributes, attrs, autouse, base, blib, caller, charnames, constant, +diagnostics, fields, filetest, integer, less, lib, locale, ops, overload, +re, sigtrap, strict, subs, utf8, vars, warnings =item Standard Modules -AnyDBM_File, AutoLoader, AutoSplit, Benchmark, CPAN, CPAN::FirstTime, -CPAN::Nox, Carp, Class::Struct, Config, Cwd, DB_File, Devel::SelfStubber, -DirHandle, DynaLoader, English, Env, Exporter, ExtUtils::Embed, -ExtUtils::Install, ExtUtils::Liblist, ExtUtils::MM_OS2, ExtUtils::MM_Unix, -ExtUtils::MM_VMS, ExtUtils::MakeMaker, ExtUtils::Manifest, -ExtUtils::Mkbootstrap, ExtUtils::Mksymlists, ExtUtils::testlib, Fatal, -Fcntl, File::Basename, File::CheckTree, File::Compare, File::Copy, -File::Find, File::Path, File::stat, FileCache, FileHandle, FindBin, -GDBM_File, Getopt::Long, Getopt::Std, I18N::Collate, IO, IO::File, -IO::Handle, IO::Pipe, IO::Seekable, IO::Select, IO::Socket, IPC::Open2, -IPC::Open3, Math::BigFloat, Math::BigInt, Math::Complex, Math::Trig, -NDBM_File, Net::Ping, Net::hostent, Net::netent, Net::protoent, -Net::servent, Opcode, Pod::Text, POSIX, SDBM_File, Safe, Search::Dict, -SelectSaver, SelfLoader, Shell, Socket, Symbol, Sys::Hostname, Sys::Syslog, -Term::Cap, Term::Complete, Term::ReadLine, Test::Harness, Text::Abbrev, -Text::ParseWords, Text::Soundex, Text::Tabs, Text::Wrap, Tie::Hash, +AnyDBM_File, AutoLoader, AutoSplit, B, B::Asmdata, B::Assembler, B::Bblock, +B::Bytecode, B::C, B::CC, B::Debug, B::Deparse, B::Disassembler, B::Lint, +B::Showlex, B::Stackobj, B::Terse, B::Xref, Benchmark, ByteLoader, CGI, +CGI::Apache, CGI::Carp, CGI::Cookie, CGI::Fast, CGI::Pretty, CGI::Push, +CGI::Switch, CPAN, CPAN::FirstTime, CPAN::Nox, Carp, Carp::Heavy, +Class::Struct, Config, Cwd, DB, DB_File, Data::Dumper, Devel::DProf, +Devel::Peek, Devel::SelfStubber, DirHandle, Dumpvalue, DynaLoader, English, +Env, Errno, Exporter, Exporter::Heavy, ExtUtils::Command, ExtUtils::Embed, +ExtUtils::Install, ExtUtils::Installed, ExtUtils::Liblist, +ExtUtils::MM_Cygwin, ExtUtils::MM_OS2, ExtUtils::MM_Unix, ExtUtils::MM_VMS, +ExtUtils::MM_Win32, ExtUtils::MakeMaker, ExtUtils::Manifest, +ExtUtils::Mkbootstrap, ExtUtils::Mksymlists, ExtUtils::Packlist, +ExtUtils::testlib, Fatal, Fcntl, File::Basename, File::CheckTree, +File::Compare, File::Copy, File::DosGlob, File::Find, File::Glob, +File::Path, File::Spec, File::Spec::Functions, File::Spec::Mac, +File::Spec::OS2, File::Spec::Unix, File::Spec::VMS, File::Spec::Win32, +File::stat, FileCache, FileHandle, FindBin, GDBM_File, Getopt::Long, +Getopt::Std, I18N::Collate, IO, IO::Dir, IO::File, IO::Handle, IO::Pipe, +IO::Poll, IO::Seekable, IO::Select, IO::Socket, IO::Socket::INET, +IO::Socket::UNIX, IPC::Msg, IPC::Open2, IPC::Open3, IPC::Semaphore, +IPC::SysV, Math::BigFloat, Math::BigInt, Math::Complex, Math::Trig, +Net::Ping, Net::hostent, Net::netent, Net::protoent, Net::servent, O, +Opcode, POSIX, Pod::Checker, Pod::Html, Pod::InputObjects, Pod::Man, +Pod::Parser, Pod::Select, Pod::Text, Pod::Text::Color, Pod::Usage, +SDBM_File, Safe, Search::Dict, SelectSaver, SelfLoader, Shell, Socket, +Symbol, Sys::Hostname, Sys::Syslog, Term::Cap, Term::Complete, +Term::ReadLine, Test, Test::Harness, Text::Abbrev, Text::ParseWords, +Text::Soundex, Text::Wrap, Tie::Array, Tie::Handle, Tie::Hash, Tie::RefHash, Tie::Scalar, Tie::SubstrHash, Time::Local, Time::gmtime, Time::localtime, Time::tm, UNIVERSAL, User::grent, User::pwent @@ -1451,8 +2042,8 @@ World Wide Web, HTML, HTTP, CGI, MIME, Server and Daemon Utilities, Archiving and Compression, Images, Pixmap and Bitmap Manipulation, Drawing, and Graphing, Mail and Usenet News, Control Flow Utilities (callbacks and exceptions etc), File Handle and Input/Output Stream Utilities, -Miscellaneous Modules, Africa, Asia, Australasia, Europe, North America, -South America +Miscellaneous Modules, Africa, Asia, Australasia, Central America, Europe, +North America, South America =item Modules: Creation, Use, and Abuse @@ -1491,8 +2082,12 @@ can then be reduced to a small =item NOTE +=back + =head2 perlmodinstall - Installing CPAN Modules +=over + =item DESCRIPTION =over @@ -1510,8 +2105,50 @@ module (sometimes unnecessary), B<INSTALL> the module =item COPYRIGHT +=back + +=head2 perlfork - Perl's fork() emulation + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=over + +=item Behavior of other Perl features in forked pseudo-processes + +$$ or $PROCESS_ID, %ENV, chdir() and all other builtins that accept +filenames, wait() and waitpid(), kill(), exec(), exit(), Open handles to +files, directories and network sockets + +=item Resource limits + +=item Killing the parent process + +=item Lifetime of the parent process and pseudo-processes + +=item CAVEATS AND LIMITATIONS + +BEGIN blocks, Open filehandles, Forking pipe open() not yet implemented, +Global state maintained by XSUBs, Interpreter embedded in larger +application, Thread-safety of extensions + +=back + +=item BUGS + +=item AUTHOR + +=item SEE ALSO + +=back + =head2 perlform - Perl formats +=over + =item DESCRIPTION =over @@ -1532,9 +2169,13 @@ module (sometimes unnecessary), B<INSTALL> the module =item WARNINGS +=back + =head2 perllocale - Perl locale handling (internationalization and localization) +=over + =item DESCRIPTION =item PREPARING TO USE LOCALES @@ -1555,9 +2196,9 @@ localization) =item Permanently fixing locale problems -=item Permanently fixing your locale configuration +=item Permanently fixing your system's locale configuration -=item Permanently fixing system locale configuration +=item Fixing system locale configuration =item The localeconv function @@ -1586,18 +2227,17 @@ localization) B<Comparison operators> (C<lt>, C<le>, C<ge>, C<gt> and C<cmp>):, B<Case-mapping interpolation> (with C<\l>, C<\L>, C<\u> or C<\U>), B<Matching operator> (C<m//>):, B<Substitution operator> (C<s///>):, -B<In-memory formatting function> (sprintf()):, B<Output formatting -functions> (printf() and write()):, B<Case-mapping functions> (lc(), -lcfirst(), uc(), ucfirst()):, B<POSIX locale-dependent functions> -(localeconv(), strcoll(),strftime(), strxfrm()):, B<POSIX character class -tests> (isalnum(), isalpha(), isdigit(),isgraph(), islower(), isprint(), -ispunct(), isspace(), isupper(), +B<Output formatting functions> (printf() and write()):, B<Case-mapping +functions> (lc(), lcfirst(), uc(), ucfirst()):, B<POSIX locale-dependent +functions> (localeconv(), strcoll(),strftime(), strxfrm()):, B<POSIX +character class tests> (isalnum(), isalpha(), isdigit(),isgraph(), +islower(), isprint(), ispunct(), isspace(), isupper(), isxdigit()): =item ENVIRONMENT -PERL_BADLANG, LC_ALL, LC_CTYPE, LC_COLLATE, LC_MONETARY, LC_NUMERIC, -LC_TIME, LANG +PERL_BADLANG, LC_ALL, LANGUAGE, LC_CTYPE, LC_COLLATE, LC_MONETARY, +LC_NUMERIC, LC_TIME, LANG =item NOTES @@ -1631,8 +2271,14 @@ LC_TIME, LANG =item HISTORY +=back + =head2 perlref - Perl references and nested data structures +=over + +=item NOTE + =item DESCRIPTION =over @@ -1655,8 +2301,52 @@ LC_TIME, LANG =item SEE ALSO +=back + +=head2 perlreftut - Mark's very short tutorial about references + +=over + +=item DESCRIPTION + +=item Who Needs Complicated Data Structures? + +=item The Solution + +=item Syntax + +=over + +=item Making References + +=item Using References + +=back + +=item An Example + +=item Arrow Rule + +=item Solution + +=item The Rest + +=item Summary + +=item Credits + +=over + +=item Distribution Conditions + +=back + +=back + =head2 perldsc - Perl Data Structures Cookbook +=over + =item DESCRIPTION arrays of arrays, hashes of arrays, arrays of hashes, hashes of hashes, @@ -1674,39 +2364,39 @@ more elaborate constructs =item CODE EXAMPLES -=item LISTS OF LISTS +=item ARRAYS OF ARRAYS =over -=item Declaration of a LIST OF LISTS +=item Declaration of a ARRAY OF ARRAYS -=item Generation of a LIST OF LISTS +=item Generation of a ARRAY OF ARRAYS -=item Access and Printing of a LIST OF LISTS +=item Access and Printing of a ARRAY OF ARRAYS =back -=item HASHES OF LISTS +=item HASHES OF ARRAYS =over -=item Declaration of a HASH OF LISTS +=item Declaration of a HASH OF ARRAYS -=item Generation of a HASH OF LISTS +=item Generation of a HASH OF ARRAYS -=item Access and Printing of a HASH OF LISTS +=item Access and Printing of a HASH OF ARRAYS =back -=item LISTS OF HASHES +=item ARRAYS OF HASHES =over -=item Declaration of a LIST OF HASHES +=item Declaration of a ARRAY OF HASHES -=item Generation of a LIST OF HASHES +=item Generation of a ARRAY OF HASHES -=item Access and Printing of a LIST OF HASHES +=item Access and Printing of a ARRAY OF HASHES =back @@ -1740,11 +2430,15 @@ more elaborate constructs =item AUTHOR -=head2 perllol, perlLoL - Manipulating Lists of Lists in Perl +=back + +=head2 perllol - Manipulating Arrays of Arrays in Perl + +=over =item DESCRIPTION -=item Declaration and Access of Lists of Lists +=item Declaration and Access of Arrays of Arrays =item Growing Your Own @@ -1756,8 +2450,70 @@ more elaborate constructs =item AUTHOR +=back + +=head2 perlboot - Beginner's Object-Oriented Tutorial + +=over + +=item DESCRIPTION + +=over + +=item If we could talk to the animals... + +=item Introducing the method invocation arrow + +=item Invoking a barnyard + +=item The extra parameter of method invocation + +=item Calling a second method to simplify things + +=item Inheriting the windpipes + +=item A few notes about @ISA + +=item Overriding the methods + +=item Starting the search from a different place + +=item The SUPER way of doing things + +=item Where we're at so far... + +=item A horse is a horse, of course of course -- or is it? + +=item Invoking an instance method + +=item Accessing the instance data + +=item How to build a horse + +=item Inheriting the constructor + +=item Making a method work with either classes or instances + +=item Adding parameters to a method + +=item More interesting instances + +=item A horse of a different color + +=item Summary + +=back + +=item SEE ALSO + +=item COPYRIGHT + +=back + =head2 perltoot - Tom's object-oriented tutorial for perl +=over + =item DESCRIPTION =item Creating a Class @@ -1852,8 +2608,64 @@ more elaborate constructs =back +=back + +=head2 perltootc - Tom's OO Tutorial for Class Data in Perl + +=over + +=item DESCRIPTION + +=item Class Data as Package Variables + +=over + +=item Putting All Your Eggs in One Basket + +=item Inheritance Concerns + +=item The Eponymous Meta-Object + +=item Indirect References to Class Data + +=item Monadic Classes + +=item Translucent Attributes + +=back + +=item Class Data as Lexical Variables + +=over + +=item Privacy and Responsibility + +=item File-Scoped Lexicals + +=item More Inheritance Concerns + +=item Locking the Door and Throwing Away the Key + +=item Translucency Revisited + +=back + +=item NOTES + +=item SEE ALSO + +=item AUTHOR AND COPYRIGHT + +=item ACKNOWLEDGEMENTS + +=item HISTORY + +=back + =head2 perlobj - Perl objects +=over + =item DESCRIPTION =over @@ -1866,14 +2678,14 @@ more elaborate constructs =item Method Invocation +=item WARNING + =item Default UNIVERSAL methods isa(CLASS), can(METHOD), VERSION( [NEED] ) =item Destructors -=item WARNING - =item Summary =item Two-Phased Garbage Collection @@ -1882,8 +2694,12 @@ isa(CLASS), can(METHOD), VERSION( [NEED] ) =item SEE ALSO +=back + =head2 perltie - how to hide an object class in a simple variable +=over + =item SYNOPSIS =item DESCRIPTION @@ -1920,8 +2736,12 @@ LIST, READ this, LIST, READLINE this, GETC this, CLOSE this, DESTROY this =item AUTHOR +=back + =head2 perlbot - Bag'o Object Tricks (the BOT) +=over + =item DESCRIPTION =item OO SCALING TIPS @@ -1946,9 +2766,13 @@ LIST, READ this, LIST, READLINE this, GETC this, CLOSE this, DESTROY this =item DELEGATION +=back + =head2 perlipc - Perl interprocess communication (signals, fifos, pipes, safe subprocesses, sockets, and semaphores) +=over + =item DESCRIPTION =item Signals @@ -2021,8 +2845,39 @@ Proto, LocalPort, Listen, Reuse =item SEE ALSO +=back + +=head2 perldbmfilter - Perl DBM Filters + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +B<filter_store_key>, B<filter_store_value>, B<filter_fetch_key>, +B<filter_fetch_value> + +=over + +=item The Filter + +=item An Example -- the NULL termination problem. + +=item Another Example -- Key is a C int. + +=back + +=item SEE ALSO + +=item AUTHOR + +=back + =head2 perldebug - Perl debugging +=over + =item DESCRIPTION =item The Perl Debugger @@ -2032,24 +2887,28 @@ Proto, LocalPort, Listen, Reuse =item Debugger Commands h [command], p expr, x expr, V [pkg [vars]], X [vars], T, s [expr], n -[expr], E<lt>CRE<gt>, c [line|sub], l, l min+incr, l min-max, l line, l -subname, -, w [line], f filename, /pattern/, ?pattern?, L, S [[!]pattern], -t, t expr, b [line] [condition], b subname [condition], b postpone subname +[expr], r, <CR>, c [line|sub], l, l min+incr, l min-max, l line, l subname, +-, w [line], f filename, /pattern/, ?pattern?, L, S [[!]regex], t, t expr, +b [line] [condition], b subname [condition], b postpone subname [condition], b load filename, b compile subname, d [line], D, a [line] -command, A, W [expr], W, O [opt[=val]] [opt"val"] [opt?].., +command, a [line], A, W expr, W, O booloption .., O anyoption? .., O +option=value .., < ?, < [ command ], << command, > ?, > command, >> +command, { ?, { [ command ], {{ command, ! number, ! -number, ! pattern, !! +cmd, H -number, q or ^D, R, |dbcmd, ||dbcmd, command, m expr, man [manpage] + +=item Configurable Options + C<recallCommand>, C<ShellBang>, C<pager>, C<tkRunning>, C<signalLevel>, C<warnLevel>, C<dieLevel>, C<AutoTrace>, C<LineInfo>, C<inhibit_exit>, C<PrintRet>, C<ornaments>, C<frame>, C<maxTraceLen>, C<arrayDepth>, C<hashDepth>, C<compactDump>, C<veryCompact>, C<globPrint>, C<DumpDBFiles>, C<DumpPackages>, C<DumpReused>, C<quote>, C<HighBit>, C<undefPrint>, -C<UsageOnly>, C<TTY>, C<noTTY>, C<ReadLine>, C<NonStop>, E<lt> [ command ], -E<lt>E<lt> command, E<gt> command, E<gt>E<gt> command, { [ command ], {{ -command, ! number, ! -number, ! pattern, !! cmd, H -number, q or ^D, R, -|dbcmd, ||dbcmd, command, m expr, m package +C<UsageOnly>, C<TTY>, C<noTTY>, C<ReadLine>, C<NonStop> =item Debugger input/output -Prompt, Multiline commands, Stack backtrace, Listing, Frame listing +Prompt, Multiline commands, Stack backtrace, Line Listing Format, Frame +listing =item Debugging compile-time statements @@ -2061,38 +2920,59 @@ Prompt, Multiline commands, Stack backtrace, Listing, Frame listing =item The Perl Profiler -=item Debugger support in perl +=back -=item Debugger Internals +=item Debugging regular expressions -=item Other resources +=item Debugging memory usage + +=item SEE ALSO =item BUGS =back -=item Debugging Perl memory usage +=head2 perlnumber - semantics of numbers and numeric operations in Perl =over -=item Using C<$ENV{PERL_DEBUG_MSTATS}> +=item SYNOPSIS -C<buckets SMALLEST(APPROX)..GREATEST(APPROX)>, Free/Used, C<Total sbrk(): -SBRKed/SBRKs:CONTINUOUS>, C<pad: 0>, C<heads: 2192>, C<chain: 0>, C<tail: -6144> +=item DESCRIPTION -=item Example of using B<-DL> switch +=item Storing numbers -C<717>, C<002>, C<054>, C<602>, C<702>, C<704> +=item Numeric operators and numeric conversions -=item B<-DL> details +=item Flavors of Perl numeric operations -C<!!!>, C<!!>, C<!> +Arithmetic operators except, C<no integer>, Arithmetic operators except, +C<use integer>, Bitwise operators, C<no integer>, Bitwise operators, C<use +integer>, Operators which expect an integer, Operators which expect a +string + +=item AUTHOR -=item Limitations of B<-DL> statistic +=item SEE ALSO + +=back + +=head2 perldebguts - Guts of Perl debugging + +=over + +=item DESCRIPTION + +=item Debugger Internals + +=over + +=item Writing Your Own Debugger =back +=item Frame Listing Output Examples + =item Debugging regular expressions =over @@ -2110,12 +2990,44 @@ C<anchored(TYPE)> =back +=item Debugging Perl memory usage + +=over + +=item Using C<$ENV{PERL_DEBUG_MSTATS}> + +C<buckets SMALLEST(APPROX)..GREATEST(APPROX)>, Free/Used, C<Total sbrk(): +SBRKed/SBRKs:CONTINUOUS>, C<pad: 0>, C<heads: 2192>, C<chain: 0>, C<tail: +6144> + +=item Example of using B<-DL> switch + +C<717>, C<002>, C<054>, C<602>, C<702>, C<704> + +=item B<-DL> details + +C<!!!>, C<!!>, C<!> + +=item Limitations of B<-DL> statistics + +=back + +=item SEE ALSO + +=back + =head2 perldiag - various Perl diagnostics +=over + =item DESCRIPTION +=back + =head2 perlsec - Perl security +=over + =item DESCRIPTION =over @@ -2134,8 +3046,12 @@ C<anchored(TYPE)> =item SEE ALSO +=back + =head2 perltrap - Perl traps for the unwary +=over + =item DESCRIPTION =over @@ -2161,7 +3077,7 @@ Subroutine, Signal, Sorting Traps, OS Traps, DBM Traps, Unclassified Traps Discontinuance, Deprecation, BugFix, Discontinuance, Discontinuance, Discontinuance, BugFix, Discontinuance, Discontinuance, BugFix, -Discontinuance, Discontinuance, Deprecation, Discontinuance +Discontinuance, Deprecation, Discontinuance =item Parsing Traps @@ -2169,7 +3085,7 @@ Parsing, Parsing, Parsing, Parsing =item Numerical Traps -Numerical, Numerical, Numerical +Numerical, Numerical, Numerical, Bitwise string ops =item General data type traps @@ -2215,11 +3131,15 @@ LIMIT specified =back +=back + =head2 perlport - Writing portable Perl +=over + =item DESCRIPTION -Not all Perl programs have to be portable, The vast majority of Perl B<is> +Not all Perl programs have to be portable, Nearly all of Perl already I<is> portable =item ISSUES @@ -2228,7 +3148,9 @@ portable =item Newlines -=item File Paths +=item Numbers endianness and Width + +=item Files and Filesystems =item System Interaction @@ -2240,6 +3162,10 @@ portable =item Time and Date +=item Character sets and character encoding + +=item Internationalisation + =item System Resources =item Security @@ -2248,10 +3174,10 @@ portable =back -=item CPAN TESTERS +=item CPAN Testers Mailing list: cpan-testers@perl.org, Testing results: -C<http://www.connect.net/gbarr/cpan-test/> +http://testers.cpan.org/ =item PLATFORMS @@ -2261,32 +3187,20 @@ C<http://www.connect.net/gbarr/cpan-test/> =item DOS and Derivatives -The djgpp environment for DOS, C<http://www.delorie.com/djgpp/>, The EMX -environment for DOS, OS/2, etc. -C<emx@iaehv.nl>,C<http://www.leo.org/pub/comp/os/os2/leo/gnu/emx+gcc/index.html>, -C<ftp://hobbes.nmsu.edu/pub/os2/dev/emx>. Build instructions -for Win32, L<perlwin32>, The ActiveState Pages, -C<http://www.activestate.com/> - -=item MacPerl +Build instructions for OS/2, L<perlos2> -The MacPerl Pages, C<http://www.ptf.com/macperl/>, The MacPerl mailing -list, C<mac-perl-request@iis.ee.ethz.ch> +=item S<Mac OS> =item VMS -L<perlvms.pod>, vmsperl list, C<vmsperl-request@newman.upenn.edu>, vmsperl -on the web, C<http://www.sidhe.org/vmsperl/index.html> +=item VOS =item EBCDIC Platforms -perl-mvs list, AS/400 Perl information at C<http://as400.rochester.ibm.com> +=item Acorn RISC OS =item Other perls -Atari, Guido Flohr's page C<http://stud.uni-sb.de/~gufl0000/>, HP 300 -MPE/iX C<http://www.cccd.edu/~markb/perlix.html>, Novell Netware - =back =item FUNCTION IMPLEMENTATIONS @@ -2295,42 +3209,65 @@ MPE/iX C<http://www.cccd.edu/~markb/perlix.html>, Novell Netware =item Alphabetical Listing of Perl Functions --I<X> FILEHANDLE, -I<X> EXPR, -I<X>, binmode FILEHANDLE, chmod LIST, chown -LIST, chroot FILENAME, chroot, crypt PLAINTEXT,SALT, dbmclose HASH, dbmopen -HASH,DBNAME,MODE, dump LABEL, exec LIST, fcntl FILEHANDLE,FUNCTION,SCALAR, -flock FILEHANDLE,OPERATION, fork, getlogin, getpgrp PID, getppid, -getpriority WHICH,WHO, getpwnam NAME, getgrnam NAME, getnetbyname NAME, -getpwuid UID, getgrgid GID, getnetbyaddr ADDR,ADDRTYPE, getprotobynumber -NUMBER, getservbyport PORT,PROTO, getpwent, getgrent, gethostent, -getnetent, getprotoent, getservent, setpwent, setgrent, sethostent -STAYOPEN, setnetent STAYOPEN, setprotoent STAYOPEN, setservent STAYOPEN, -endpwent, endgrent, endhostent, endnetent, endprotoent, endservent, -getsockopt SOCKET,LEVEL,OPTNAME, glob EXPR, glob, ioctl -FILEHANDLE,FUNCTION,SCALAR, kill LIST, link OLDFILE,NEWFILE, lstat -FILEHANDLE, lstat EXPR, lstat, msgctl ID,CMD,ARG, msgget KEY,FLAGS, msgsnd -ID,MSG,FLAGS, msgrcv ID,VAR,SIZE,TYPE,FLAGS, open FILEHANDLE,EXPR, open -FILEHANDLE, pipe READHANDLE,WRITEHANDLE, readlink EXPR, readlink, select -RBITS,WBITS,EBITS,TIMEOUT, semctl ID,SEMNUM,CMD,ARG, semget -KEY,NSEMS,FLAGS, semop KEY,OPSTRING, setpgrp PID,PGRP, setpriority -WHICH,WHO,PRIORITY, setsockopt SOCKET,LEVEL,OPTNAME,OPTVAL, shmctl -ID,CMD,ARG, shmget KEY,SIZE,FLAGS, shmread ID,VAR,POS,SIZE, shmwrite -ID,STRING,POS,SIZE, socketpair SOCKET1,SOCKET2,DOMAIN,TYPE,PROTOCOL, stat -FILEHANDLE, stat EXPR, stat, symlink OLDFILE,NEWFILE, syscall LIST, system -LIST, times, truncate FILEHANDLE,LENGTH, truncate EXPR,LENGTH, umask EXPR, -umask, utime LIST, wait, waitpid PID,FLAGS +-I<X> FILEHANDLE, -I<X> EXPR, -I<X>, alarm SECONDS, alarm, binmode +FILEHANDLE, chmod LIST, chown LIST, chroot FILENAME, chroot, crypt +PLAINTEXT,SALT, dbmclose HASH, dbmopen HASH,DBNAME,MODE, dump LABEL, exec +LIST, fcntl FILEHANDLE,FUNCTION,SCALAR, flock FILEHANDLE,OPERATION, fork, +getlogin, getpgrp PID, getppid, getpriority WHICH,WHO, getpwnam NAME, +getgrnam NAME, getnetbyname NAME, getpwuid UID, getgrgid GID, getnetbyaddr +ADDR,ADDRTYPE, getprotobynumber NUMBER, getservbyport PORT,PROTO, getpwent, +getgrent, gethostent, getnetent, getprotoent, getservent, setpwent, +setgrent, sethostent STAYOPEN, setnetent STAYOPEN, setprotoent STAYOPEN, +setservent STAYOPEN, endpwent, endgrent, endhostent, endnetent, +endprotoent, endservent, getsockopt SOCKET,LEVEL,OPTNAME, glob EXPR, glob, +ioctl FILEHANDLE,FUNCTION,SCALAR, kill SIGNAL, LIST, link OLDFILE,NEWFILE, +lstat FILEHANDLE, lstat EXPR, lstat, msgctl ID,CMD,ARG, msgget KEY,FLAGS, +msgsnd ID,MSG,FLAGS, msgrcv ID,VAR,SIZE,TYPE,FLAGS, open FILEHANDLE,EXPR, +open FILEHANDLE, pipe READHANDLE,WRITEHANDLE, readlink EXPR, readlink, +select RBITS,WBITS,EBITS,TIMEOUT, semctl ID,SEMNUM,CMD,ARG, semget +KEY,NSEMS,FLAGS, semop KEY,OPSTRING, setgrent, setpgrp PID,PGRP, +setpriority WHICH,WHO,PRIORITY, setpwent, setsockopt +SOCKET,LEVEL,OPTNAME,OPTVAL, shmctl ID,CMD,ARG, shmget KEY,SIZE,FLAGS, +shmread ID,VAR,POS,SIZE, shmwrite ID,STRING,POS,SIZE, socketpair +SOCKET1,SOCKET2,DOMAIN,TYPE,PROTOCOL, stat FILEHANDLE, stat EXPR, stat, +symlink OLDFILE,NEWFILE, syscall LIST, sysopen +FILEHANDLE,FILENAME,MODE,PERMS, system LIST, times, truncate +FILEHANDLE,LENGTH, truncate EXPR,LENGTH, umask EXPR, umask, utime LIST, +wait, waitpid PID,FLAGS =back +=item CHANGES + +v1.47, 22 March 2000, v1.46, 12 February 2000, v1.45, 20 December 1999, +v1.44, 19 July 1999, v1.43, 24 May 1999, v1.42, 22 May 1999, v1.41, 19 May +1999, v1.40, 11 April 1999, v1.39, 11 February 1999, v1.38, 31 December +1998, v1.37, 19 December 1998, v1.36, 9 September 1998, v1.35, 13 August +1998, v1.33, 06 August 1998, v1.32, 05 August 1998, v1.30, 03 August 1998, +v1.23, 10 July 1998 + +=item Supported Platforms + +=item SEE ALSO + =item AUTHORS / CONTRIBUTORS =item VERSION +=back + =head2 perlstyle - Perl style guide +=over + =item DESCRIPTION +=back + =head2 perlpod - plain old documentation +=over + =item DESCRIPTION =over @@ -2353,12 +3290,20 @@ umask, utime LIST, wait, waitpid PID,FLAGS =item AUTHOR +=back + =head2 perlbook - Perl book information +=over + =item DESCRIPTION +=back + =head2 perlembed - how to embed perl in your C program +=over + =item DESCRIPTION =over @@ -2399,8 +3344,12 @@ program =item COPYRIGHT +=back + =head2 perlapio - perl's IO abstraction interface. +=over + =item SYNOPSIS =item DESCRIPTION @@ -2429,8 +3378,12 @@ B<PerlIO_get_base(f)>, B<PerlIO_get_bufsiz(f)> =back +=back + =head2 perlxs - XS language reference manual +=over + =item DESCRIPTION =over @@ -2519,59 +3472,99 @@ B<PerlIO_get_base(f)>, B<PerlIO_get_bufsiz(f)> =item AUTHOR -=head2 perlxstut, perlXStut - Tutorial for XSUBs +=back + +=head2 perlxstut, perlXStut - Tutorial for writing XSUBs + +=over =item DESCRIPTION +=item SPECIAL NOTES + =over -=item VERSION CAVEAT +=item make + +=item Version caveat + +=item Dynamic Loading versus Static Loading + +=back + +=item TUTORIAL -=item DYNAMIC VERSUS STATIC +=over =item EXAMPLE 1 =item EXAMPLE 2 -=item WHAT HAS GONE ON? +=item What has gone on? -=item WRITING GOOD TEST SCRIPTS +=item Writing good test scripts =item EXAMPLE 3 -=item WHAT'S NEW HERE? +=item What's new here? -=item INPUT AND OUTPUT PARAMETERS +=item Input and Output Parameters -=item THE XSUBPP COMPILER +=item The XSUBPP Program -=item THE TYPEMAP FILE +=item The TYPEMAP file -=item WARNING +=item Warning about Output Arguments =item EXAMPLE 4 -=item WHAT HAS HAPPENED HERE? +=item What has happened here? -=item SPECIFYING ARGUMENTS TO XSUBPP +=item Anatomy of .xs file -=item THE ARGUMENT STACK +=item Getting the fat out of XSUBs -=item EXTENDING YOUR EXTENSION +=item More about XSUB arguments -=item DOCUMENTING YOUR EXTENSION +=item The Argument Stack -=item INSTALLING YOUR EXTENSION +=item Extending your Extension -=item SEE ALSO +=item Documenting your Extension + +=item Installing your Extension + +=item EXAMPLE 5 + +=item New Things in this Example + +=item EXAMPLE 6 (Coming Soon) + +=item EXAMPLE 7 (Coming Soon) + +=item EXAMPLE 8 (Coming Soon) + +=item EXAMPLE 9 (Coming Soon) + +=item Troubleshooting these Examples + +=back + +=item See also =item Author +=over + =item Last Changed =back -=head2 perlguts - Perl's Internal Functions +=back + +=head2 perlguts - Introduction to the Perl API + +=over =item DESCRIPTION @@ -2620,11 +3613,12 @@ B<PerlIO_get_base(f)>, B<PerlIO_get_bufsiz(f)> C<SAVEINT(int i)>, C<SAVEIV(IV i)>, C<SAVEI32(I32 i)>, C<SAVELONG(long i)>, C<SAVESPTR(s)>, C<SAVEPPTR(p)>, C<SAVEFREESV(SV *sv)>, C<SAVEFREEOP(OP *op)>, C<SAVEFREEPV(p)>, C<SAVECLEARSV(SV *sv)>, C<SAVEDELETE(HV *hv, char -*key, I32 length)>, C<SAVEDESTRUCTOR(f,p)>, C<SAVESTACK_POS()>, C<SV* -save_scalar(GV *gv)>, C<AV* save_ary(GV *gv)>, C<HV* save_hash(GV *gv)>, -C<void save_item(SV *item)>, C<void save_list(SV **sarg, I32 maxsarg)>, -C<SV* save_svref(SV **sptr)>, C<void save_aptr(AV **aptr)>, C<void -save_hptr(HV **hptr)> +*key, I32 length)>, C<SAVEDESTRUCTOR(DESTRUCTORFUNC_NOCONTEXT_t f, void +*p)>, C<SAVEDESTRUCTOR_X(DESTRUCTORFUNC_t f, void *p)>, C<SAVESTACK_POS()>, +C<SV* save_scalar(GV *gv)>, C<AV* save_ary(GV *gv)>, C<HV* save_hash(GV +*gv)>, C<void save_item(SV *item)>, C<void save_list(SV **sarg, I32 +maxsarg)>, C<SV* save_svref(SV **sptr)>, C<void save_aptr(AV **aptr)>, +C<void save_hptr(HV **hptr)> =back @@ -2666,67 +3660,35 @@ save_hptr(HV **hptr)> =back -=item API LISTING - -av_clear, av_extend, av_fetch, AvFILL, av_len, av_make, av_pop, av_push, -av_shift, av_store, av_undef, av_unshift, CLASS, Copy, croak, CvSTASH, -PL_DBsingle, PL_DBsub, PL_DBtrace, dMARK, dORIGMARK, PL_dowarn, dSP, -dXSARGS, dXSI32, do_binmode, ENTER, EXTEND, fbm_compile, fbm_instr, -FREETMPS, G_ARRAY, G_DISCARD, G_EVAL, GIMME, GIMME_V, G_NOARGS, G_SCALAR, -gv_fetchmeth, gv_fetchmethod, gv_fetchmethod_autoload, G_VOID, gv_stashpv, -gv_stashsv, GvSV, HEf_SVKEY, HeHASH, HeKEY, HeKLEN, HePV, HeSVKEY, -HeSVKEY_force, HeSVKEY_set, HeVAL, hv_clear, hv_delayfree_ent, hv_delete, -hv_delete_ent, hv_exists, hv_exists_ent, hv_fetch, hv_fetch_ent, -hv_free_ent, hv_iterinit, hv_iterkey, hv_iterkeysv, hv_iternext, -hv_iternextsv, hv_iterval, hv_magic, HvNAME, hv_store, hv_store_ent, -hv_undef, isALNUM, isALPHA, isDIGIT, isLOWER, isSPACE, isUPPER, items, ix, -LEAVE, looks_like_number, MARK, mg_clear, mg_copy, mg_find, mg_free, -mg_get, mg_len, mg_magical, mg_set, Move, PL_na, New, newAV, Newc, -newCONSTSUB, newHV, newRV_inc, newRV_noinc, NEWSV, newSViv, newSVnv, -newSVpv, newSVpvf, newSVpvn, newSVrv, newSVsv, newXS, newXSproto, Newz, -Nullav, Nullch, Nullcv, Nullhv, Nullsv, ORIGMARK, perl_alloc, -perl_call_argv, perl_call_method, perl_call_pv, perl_call_sv, -perl_construct, perl_destruct, perl_eval_sv, perl_eval_pv, perl_free, -perl_get_av, perl_get_cv, perl_get_hv, perl_get_sv, perl_parse, -perl_require_pv, perl_run, POPi, POPl, POPp, POPn, POPs, PUSHMARK, PUSHi, -PUSHn, PUSHp, PUSHs, PUSHu, PUTBACK, Renew, Renewc, RETVAL, safefree, -safemalloc, saferealloc, savepv, savepvn, SAVETMPS, SP, SPAGAIN, ST, strEQ, -strGE, strGT, strLE, strLT, strNE, strnEQ, strnNE, sv_2mortal, sv_bless, -sv_catpv, sv_catpv_mg, sv_catpvn, sv_catpvn_mg, sv_catpvf, sv_catpvf_mg, -sv_catsv, sv_catsv_mg, sv_chop, sv_cmp, SvCUR, SvCUR_set, sv_dec, -sv_derived_from, sv_derived_from, SvEND, sv_eq, SvGETMAGIC, SvGROW, -sv_grow, sv_inc, sv_insert, SvIOK, SvIOK_off, SvIOK_on, SvIOK_only, SvIOKp, -sv_isa, sv_isobject, SvIV, SvIVX, SvLEN, sv_len, sv_magic, sv_mortalcopy, -sv_newmortal, SvNIOK, SvNIOK_off, SvNIOKp, PL_sv_no, SvNOK, SvNOK_off, -SvNOK_on, SvNOK_only, SvNOKp, SvNV, SvNVX, SvOK, SvOOK, SvPOK, SvPOK_off, -SvPOK_on, SvPOK_only, SvPOKp, SvPV, SvPV_force, SvPVX, SvREFCNT, -SvREFCNT_dec, SvREFCNT_inc, SvROK, SvROK_off, SvROK_on, SvRV, SvSETMAGIC, -sv_setiv, sv_setiv_mg, sv_setnv, sv_setnv_mg, sv_setpv, sv_setpv_mg, -sv_setpviv, sv_setpviv_mg, sv_setpvn, sv_setpvn_mg, sv_setpvf, -sv_setpvf_mg, sv_setref_iv, sv_setref_nv, sv_setref_pv, sv_setref_pvn, -SvSetSV, SvSetSV_nosteal, sv_setsv, sv_setsv_mg, sv_setuv, sv_setuv_mg, -SvSTASH, SvTAINT, SvTAINTED, SvTAINTED_off, SvTAINTED_on, SVt_IV, SVt_PV, -SVt_PVAV, SVt_PVCV, SVt_PVHV, SVt_PVMG, SVt_NV, SvTRUE, SvTYPE, svtype, -PL_sv_undef, sv_unref, SvUPGRADE, sv_upgrade, sv_usepvn, sv_usepvn_mg, -sv_vcatpvfn(sv, pat, patlen, args, svargs, svmax, used_locale), -sv_vsetpvfn(sv, pat, patlen, args, svargs, svmax, used_locale), SvUV, -SvUVX, PL_sv_yes, THIS, toLOWER, toUPPER, warn, XPUSHi, XPUSHn, XPUSHp, -XPUSHs, XPUSHu, XS, XSRETURN, XSRETURN_EMPTY, XSRETURN_IV, XSRETURN_NO, -XSRETURN_NV, XSRETURN_PV, XSRETURN_UNDEF, XSRETURN_YES, XST_mIV, XST_mNV, -XST_mNO, XST_mPV, XST_mUNDEF, XST_mYES, XS_VERSION, XS_VERSION_BOOTCHECK, -Zero +=item How multiple interpreters and concurrency are supported + +=over + +=item Background and PERL_IMPLICIT_CONTEXT + +=item How do I use all this in extensions? + +=item Future Plans and PERL_IMPLICIT_SYS + +=back =item AUTHORS +=item SEE ALSO + +=back + =head2 perlcall - Perl calling conventions from C +=over + =item DESCRIPTION An Error Handler, An Event Driven Program -=item THE PERL_CALL FUNCTIONS +=item THE CALL_ FUNCTIONS -B<perl_call_sv>, B<perl_call_pv>, B<perl_call_method>, B<perl_call_argv> +call_sv, call_pv, call_method, call_argv =item FLAG VALUES @@ -2772,11 +3734,11 @@ B<perl_call_sv>, B<perl_call_pv>, B<perl_call_method>, B<perl_call_argv> =item Using G_KEEPERR -=item Using perl_call_sv +=item Using call_sv -=item Using perl_call_argv +=item Using call_argv -=item Using perl_call_method +=item Using call_method =item Using GIMME_V @@ -2800,8 +3762,125 @@ callback =item DATE +=back + +=head2 perlcompile - Introduction to the Perl Compiler-Translator + +=over + +=item DESCRIPTION + +=over + +=item Layout + +B::Bytecode, B::C, B::CC, B::Lint, B::Deparse, B::Xref + +=back + +=item Using The Back Ends + +=over + +=item The Cross Referencing Back End + +i, &, s, r + +=item The Decompiling Back End + +=item The Lint Back End + +=item The Simple C Back End + +=item The Bytecode Back End + +=item The Optimized C Back End + +B, O, B::Asmdata, B::Assembler, B::Bblock, B::Bytecode, B::C, B::CC, +B::Debug, B::Deparse, B::Disassembler, B::Lint, B::Showlex, B::Stackobj, +B::Stash, B::Terse, B::Xref + +=back + +=item KNOWN PROBLEMS + +=item AUTHOR + +=back + +=head2 perlapi - autogenerated documentation for the perl public API + +=over + +=item DESCRIPTION + +AvFILL, av_clear, av_extend, av_fetch, av_len, av_make, av_pop, av_push, +av_shift, av_store, av_undef, av_unshift, call_argv, call_method, call_pv, +call_sv, CLASS, Copy, croak, CvSTASH, dMARK, dORIGMARK, dSP, dXSARGS, +dXSI32, ENTER, eval_pv, eval_sv, EXTEND, fbm_compile, fbm_instr, FREETMPS, +get_av, get_cv, get_hv, get_sv, GIMME, GIMME_V, GvSV, gv_fetchmeth, +gv_fetchmethod, gv_fetchmethod_autoload, gv_stashpv, gv_stashsv, G_ARRAY, +G_DISCARD, G_EVAL, G_NOARGS, G_SCALAR, G_VOID, HEf_SVKEY, HeHASH, HeKEY, +HeKLEN, HePV, HeSVKEY, HeSVKEY_force, HeSVKEY_set, HeVAL, HvNAME, hv_clear, +hv_delete, hv_delete_ent, hv_exists, hv_exists_ent, hv_fetch, hv_fetch_ent, +hv_iterinit, hv_iterkey, hv_iterkeysv, hv_iternext, hv_iternextsv, +hv_iterval, hv_magic, hv_store, hv_store_ent, hv_undef, isALNUM, isALPHA, +isDIGIT, isLOWER, isSPACE, isUPPER, items, ix, LEAVE, looks_like_number, +MARK, mg_clear, mg_copy, mg_find, mg_free, mg_get, mg_length, mg_magical, +mg_set, Move, New, newAV, Newc, newCONSTSUB, newHV, newRV_inc, newRV_noinc, +NEWSV, newSViv, newSVnv, newSVpv, newSVpvf, newSVpvn, newSVrv, newSVsv, +newSVuv, newXS, newXSproto, Newz, Nullav, Nullch, Nullcv, Nullhv, Nullsv, +ORIGMARK, perl_alloc, perl_construct, perl_destruct, perl_free, perl_parse, +perl_run, PL_DBsingle, PL_DBsub, PL_DBtrace, PL_dowarn, PL_modglobal, +PL_na, PL_sv_no, PL_sv_undef, PL_sv_yes, POPi, POPl, POPn, POPp, POPs, +PUSHi, PUSHMARK, PUSHn, PUSHp, PUSHs, PUSHu, PUTBACK, Renew, Renewc, +require_pv, RETVAL, Safefree, savepv, savepvn, SAVETMPS, SP, SPAGAIN, ST, +strEQ, strGE, strGT, strLE, strLT, strNE, strnEQ, strnNE, StructCopy, +SvCUR, SvCUR_set, SvEND, SvGETMAGIC, SvGROW, SvIOK, SvIOKp, SvIOK_off, +SvIOK_on, SvIOK_only, SvIV, SvIVX, SvLEN, SvNIOK, SvNIOKp, SvNIOK_off, +SvNOK, SvNOKp, SvNOK_off, SvNOK_on, SvNOK_only, SvNV, SvNVX, SvOK, SvOOK, +SvPOK, SvPOKp, SvPOK_off, SvPOK_on, SvPOK_only, SvPV, SvPVX, SvPV_force, +SvPV_nolen, SvREFCNT, SvREFCNT_dec, SvREFCNT_inc, SvROK, SvROK_off, +SvROK_on, SvRV, SvSETMAGIC, SvSetSV, SvSetSV_nosteal, SvSTASH, SvTAINT, +SvTAINTED, SvTAINTED_off, SvTAINTED_on, SvTRUE, SvTYPE, svtype, SVt_IV, +SVt_NV, SVt_PV, SVt_PVAV, SVt_PVCV, SVt_PVHV, SVt_PVMG, SvUPGRADE, SvUV, +SvUVX, sv_2mortal, sv_bless, sv_catpv, sv_catpvf, sv_catpvf_mg, sv_catpvn, +sv_catpvn_mg, sv_catpv_mg, sv_catsv, sv_catsv_mg, sv_chop, sv_cmp, sv_dec, +sv_derived_from, sv_eq, sv_grow, sv_inc, sv_insert, sv_isa, sv_isobject, +sv_len, sv_magic, sv_mortalcopy, sv_newmortal, sv_setiv, sv_setiv_mg, +sv_setnv, sv_setnv_mg, sv_setpv, sv_setpvf, sv_setpvf_mg, sv_setpviv, +sv_setpviv_mg, sv_setpvn, sv_setpvn_mg, sv_setpv_mg, sv_setref_iv, +sv_setref_nv, sv_setref_pv, sv_setref_pvn, sv_setsv, sv_setsv_mg, sv_setuv, +sv_setuv_mg, sv_unref, sv_upgrade, sv_usepvn, sv_usepvn_mg, sv_vcatpvfn, +sv_vsetpvfn, THIS, toLOWER, toUPPER, warn, XPUSHi, XPUSHn, XPUSHp, XPUSHs, +XPUSHu, XS, XSRETURN, XSRETURN_EMPTY, XSRETURN_IV, XSRETURN_NO, +XSRETURN_NV, XSRETURN_PV, XSRETURN_UNDEF, XSRETURN_YES, XST_mIV, XST_mNO, +XST_mNV, XST_mPV, XST_mUNDEF, XST_mYES, XS_VERSION, XS_VERSION_BOOTCHECK, +Zero + +=item AUTHORS + +=item SEE ALSO + +=back + +=head2 perlintern - autogenerated documentation of purely B<internal> + Perl functions + +=over + +=item DESCRIPTION + +=item AUTHORS + +=item SEE ALSO + +=back + =head2 perlhist - the Perl history records +=over + =item DESCRIPTION =item INTRODUCTION @@ -2826,9 +3905,13 @@ callback =item THE KEEPERS OF THE RECORDS +=back + =head1 PRAGMA DOCUMENTATION -=head2 attrs - set/get attributes of a subroutine +=head2 attrs - set/get attributes of a subroutine (deprecated) + +=over =item SYNOPSIS @@ -2836,14 +3919,78 @@ callback method, locked +=back + =head2 re - Perl pragma to alter regular expression behaviour +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=back + +=head2 attributes - get/set subroutine or variable attributes + +=over + =item SYNOPSIS =item DESCRIPTION +=over + +=item Built-in Attributes + +locked, method, lvalue + +=item Available Subroutines + +get, reftype + +=item Package-specific Attribute Handling + +FETCH_I<type>_ATTRIBUTES, MODIFY_I<type>_ATTRIBUTES + +=item Syntax of Attribute Lists + +=back + +=item EXPORTS + +=over + +=item Default exports + +=item Available exports + +=item Export tags defined + +=back + +=item EXAMPLES + +=item SEE ALSO + +=back + +=head2 attrs - set/get attributes of a subroutine (deprecated) + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +method, locked + +=back + =head2 autouse - postpone load of modules until a function is used +=over + =item SYNOPSIS =item DESCRIPTION @@ -2854,16 +4001,26 @@ method, locked =item SEE ALSO +=back + =head2 base - Establish IS-A relationship with base class at compile time +=over + =item SYNOPSIS =item DESCRIPTION +=item HISTORY + =item SEE ALSO +=back + =head2 blib - Use MakeMaker's uninstalled version of a package +=over + =item SYNOPSIS =item DESCRIPTION @@ -2872,8 +4029,40 @@ method, locked =item AUTHOR +=back + +=head2 bytes - Perl pragma to force byte semantics rather than character +semantics + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item SEE ALSO + +=back + +=head2 charnames - define character names for C<\N{named}> string literal +escape. + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CUSTOM TRANSLATORS + +=item BUGS + +=back + =head2 constant - Perl pragma to declare constants +=over + =item SYNOPSIS =item DESCRIPTION @@ -2888,9 +4077,13 @@ method, locked =item COPYRIGHT +=back + =head2 diagnostics - Perl compiler pragma to force verbose warning diagnostics +=over + =item SYNOPSIS =item DESCRIPTION @@ -2911,40 +4104,74 @@ diagnostics =item AUTHOR +=back + =head2 fields - compile-time class fields +=over + =item SYNOPSIS =item DESCRIPTION +new, phash + =item SEE ALSO +=back + +=head2 filetest - Perl pragma to control the filetest permission operators + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=over + +=item subpragma access + +=back + +=back + =head2 integer - Perl pragma to compute arithmetic in integer instead of double +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 less - perl pragma to request less of something from the compiler +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 lib - manipulate @INC at compile time +=over + =item SYNOPSIS =item DESCRIPTION =over -=item ADDING DIRECTORIES TO @INC +=item Adding directories to @INC -=item DELETING DIRECTORIES FROM @INC +=item Deleting directories from @INC -=item RESTORING ORIGINAL @INC +=item Restoring original @INC =back @@ -2952,18 +4179,50 @@ double =item AUTHOR +=back + =head2 locale - Perl pragma to use and avoid POSIX locales for built-in operations +=over + =item SYNOPSIS =item DESCRIPTION -=head2 overload - Package for overloading perl operations +=back + +=head2 open - perl pragma to set default disciplines for input and output + +=over =item SYNOPSIS -=item CAVEAT SCRIPTOR +=item DESCRIPTION + +=item UNIMPLEMENTED FUNCTIONALITY + +=item SEE ALSO + +=back + +=head2 ops - Perl pragma to restrict unsafe operations when compiling + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item SEE ALSO + +=back + +=head2 overload - Package for overloading perl operations + +=over + +=item SYNOPSIS =item DESCRIPTION @@ -2977,11 +4236,15 @@ FALSE, TRUE, C<undef> =item Calling Conventions for Unary Operations +=item Calling Conventions for Mutators + +C<++> and C<-->, C<x=> and other assignment versions + =item Overloadable Operations I<Arithmetic operations>, I<Comparison operations>, I<Bit operations>, I<Increment and decrement>, I<Transcendental functions>, I<Boolean, string -and numeric conversion>, I<Special> +and numeric conversion>, I<Iteration>, I<Dereferencing>, I<Special> =item Inheritance and overloading @@ -3010,9 +4273,10 @@ B<Example> I<Assignment forms of arithmetic operations>, I<Conversion operations>, I<Increment and decrement>, C<abs($a)>, I<Unary minus>, I<Negation>, -I<Concatenation>, I<Comparison operations>, I<Copy operator> +I<Concatenation>, I<Comparison operations>, I<Iterator>, I<Dereferencing>, +I<Copy operator> -=item WARNING +=item Losing overloading =item Run-time Overloading @@ -3026,14 +4290,44 @@ integer, float, binary, q, qr =item IMPLEMENTATION +=item Metaphor clash + +=item Cookbook + +=over + +=item Two-face scalars + +=item Two-face references + +=item Symbolic calculator + +=item I<Really> symbolic calculator + +=back + =item AUTHOR =item DIAGNOSTICS =item BUGS +=back + +=head2 re - Perl pragma to alter regular expression behaviour + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=back + =head2 sigtrap - Perl pragma to enable simple signal handling +=over + =item SYNOPSIS =item DESCRIPTION @@ -3058,30 +4352,71 @@ B<untrapped>, B<any>, I<signal>, I<number> =item EXAMPLES +=back + =head2 strict - Perl pragma to restrict unsafe constructs +=over + =item SYNOPSIS =item DESCRIPTION C<strict refs>, C<strict vars>, C<strict subs> +=back + =head2 subs - Perl pragma to predeclare sub names +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=back + +=head2 utf8 - Perl pragma to enable/disable UTF-8 in source code + +=over + =item SYNOPSIS =item DESCRIPTION -=head2 vars - Perl pragma to predeclare global variable names +=item SEE ALSO + +=back + +=head2 vars - Perl pragma to predeclare global variable names (obsolete) + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=back + +=head2 warnings - Perl pragma to control optional warnings + +=over =item SYNOPSIS =item DESCRIPTION +use warnings::register, warnings::enabled([$category]), +warnings::warn([$category,] $message) + +=back + =head1 MODULE DOCUMENTATION =head2 AnyDBM_File - provide framework for multiple DBMs +=over + =item SYNOPSIS =item DESCRIPTION @@ -3096,8 +4431,12 @@ C<strict refs>, C<strict vars>, C<strict subs> =item SEE ALSO +=back + =head2 AutoLoader - load subroutines only on demand +=over + =item SYNOPSIS =item DESCRIPTION @@ -3120,8 +4459,12 @@ C<strict refs>, C<strict vars>, C<strict subs> =item SEE ALSO +=back + =head2 AutoSplit - split a package for autoloading +=over + =item SYNOPSIS =item DESCRIPTION @@ -3136,8 +4479,12 @@ $keep, $check, $modtime =item DIAGNOSTICS +=back + =head2 B - The Perl Compiler +=over + =item SYNOPSIS =item DESCRIPTION @@ -3186,8 +4533,8 @@ USEFUL, PREVIOUS, RARE, TABLE =item B::GV METHODS -NAME, STASH, SV, IO, FORM, AV, HV, EGV, CV, CVGEN, LINE, FILEGV, GvREFCNT, -FLAGS +is_empty, NAME, STASH, SV, IO, FORM, AV, HV, EGV, CV, CVGEN, LINE, FILE, +FILEGV, GvREFCNT, FLAGS =item B::IO METHODS @@ -3200,7 +4547,8 @@ FILL, MAX, OFF, ARRAY, AvFLAGS =item B::CV METHODS -STASH, START, ROOT, GV, FILEGV, DEPTH, PADLIST, OUTSIDE, XSUB, XSUBANY +STASH, START, ROOT, GV, FILE, DEPTH, PADLIST, OUTSIDE, XSUB, XSUBANY, +CvFLAGS =item B::HV METHODS @@ -3210,7 +4558,7 @@ FILL, MAX, KEYS, RITER, NAME, PMROOT, ARRAY =item B::OP METHODS -next, sibling, ppaddr, desc, targ, type, seq, flags, private +next, sibling, name, ppaddr, desc, targ, type, seq, flags, private =item B::UNOP METHOD @@ -3224,10 +4572,6 @@ last other -=item B::CONDOP METHODS - -true, false - =item B::LISTOP METHOD children @@ -3238,11 +4582,11 @@ pmreplroot, pmreplstart, pmnext, pmregexp, pmflags, pmpermflags, precomp =item B::SVOP METHOD -sv +sv, gv -=item B::GVOP METHOD +=item B::PADOP METHOD -gv +padix =item B::PVOP METHOD @@ -3254,62 +4598,84 @@ redoop, nextop, lastop =item B::COP METHODS -label, stash, filegv, cop_seq, arybase, line +label, stash, file, cop_seq, arybase, line =back =item FUNCTIONS EXPORTED BY C<B> -main_cv, main_root, main_start, comppadlist, sv_undef, sv_yes, sv_no, -walkoptree(OP, METHOD), walkoptree_debug(DEBUG), walksymtable(SYMREF, -METHOD, RECURSE), svref_2object(SV), ppname(OPNUM), hash(STR), cast_I32(I), -minus_c, cstring(STR), class(OBJ), threadsv_names, byteload_fh(FILEHANDLE) +main_cv, init_av, main_root, main_start, comppadlist, sv_undef, sv_yes, +sv_no, amagic_generation, walkoptree(OP, METHOD), walkoptree_debug(DEBUG), +walksymtable(SYMREF, METHOD, RECURSE), svref_2object(SV), ppname(OPNUM), +hash(STR), cast_I32(I), minus_c, cstring(STR), class(OBJ), threadsv_names =item AUTHOR +=back + =head2 B::Asmdata - Autogenerated data about Perl ops, used to generate bytecode +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 B::Assembler - Assemble Perl bytecode +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 B::Bblock - Walk basic blocks +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 B::Bytecode - Perl compiler's bytecode backend +=over + =item SYNOPSIS =item DESCRIPTION =item OPTIONS -B<-ofilename>, B<-->, B<-f>, B<-fcompress-nullops>, +B<-ofilename>, B<-afilename>, B<-->, B<-f>, B<-fcompress-nullops>, B<-fomit-sequence-numbers>, B<-fbypass-nullops>, B<-fstrip-syntax-tree>, B<-On>, B<-D>, B<-Do>, B<-Db>, B<-Da>, B<-DC>, B<-S>, B<-m> +=item EXAMPLES + =item BUGS =item AUTHOR +=back + =head2 B::C - Perl compiler's C backend +=over + =item SYNOPSIS =item DESCRIPTION @@ -3317,7 +4683,7 @@ B<-On>, B<-D>, B<-Do>, B<-Db>, B<-Da>, B<-DC>, B<-S>, B<-m> =item OPTIONS B<-ofilename>, B<-v>, B<-->, B<-uPackname>, B<-D>, B<-Do>, B<-Dc>, B<-DA>, -B<-DC>, B<-DM>, B<-f>, B<-fcog>, B<-fno-cog>, B<-On> +B<-DC>, B<-DM>, B<-f>, B<-fcog>, B<-fno-cog>, B<-On>, B<-llimit> =item EXAMPLES @@ -3325,8 +4691,12 @@ B<-DC>, B<-DM>, B<-f>, B<-fcog>, B<-fno-cog>, B<-On> =item AUTHOR +=back + =head2 B::CC - Perl compiler's optimized C translation backend +=over + =item SYNOPSIS =item DESCRIPTION @@ -3357,38 +4727,69 @@ B<-ffreetmps-each-bblock>, B<-ffreetmps-each-loop>, B<-fomit-taint>, B<-On> =item AUTHOR +=back + =head2 B::Debug - Walk Perl syntax tree, printing debug info about ops +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 B::Deparse - Perl compiler backend to produce perl code +=over + =item SYNOPSIS =item DESCRIPTION =item OPTIONS -B<-p>, B<-u>I<PACKAGE>, B<-l>, B<-s>I<LETTERS>, B<C> +B<-l>, B<-p>, B<-q>, B<-u>I<PACKAGE>, B<-s>I<LETTERS>, B<C>, B<i>I<NUMBER>, +B<T>, B<v>I<STRING>B<.> + +=item USING B::Deparse AS A MODULE + +=over + +=item Synopsis + +=item Description + +=item new + +=item coderef2text + +=back =item BUGS =item AUTHOR +=back + =head2 B::Disassembler - Disassemble Perl bytecode +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 B::Lint - Perl lint +=over + =item SYNOPSIS =item DESCRIPTION @@ -3406,8 +4807,12 @@ B<-u Package> =item AUTHOR +=back + =head2 B::O, O - Generic interface to Perl Compiler backends +=over + =item SYNOPSIS =item DESCRIPTION @@ -3418,32 +4823,48 @@ B<-u Package> =item AUTHOR +=back + =head2 B::Showlex - Show lexical variables used in functions or files +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 B::Stackobj - Helper module for CC backend +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 B::Terse - Walk Perl syntax tree, printing terse info about ops +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 B::Xref - Generates cross reference reports for Perl programs +=over + =item SYNOPSIS =item DESCRIPTION @@ -3456,7 +4877,23 @@ C<-oFILENAME>, C<-r>, C<-D[tO]> =item AUTHOR -=head2 Benchmark - benchmark running times of code +=back + +=head2 Bblock, B::Bblock - Walk basic blocks + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item AUTHOR + +=back + +=head2 Benchmark - benchmark running times of Perl code + +=over =item SYNOPSIS @@ -3466,7 +4903,7 @@ C<-oFILENAME>, C<-r>, C<-D[tO]> =item Methods -new, debug +new, debug, iters =item Standard Exports @@ -3476,22 +4913,68 @@ TIMEDIFF, [ STYLE, [ FORMAT ] ] ) =item Optional Exports -clearcache ( COUNT ), clearallcache ( ), disablecache ( ), enablecache ( ) +clearcache ( COUNT ), clearallcache ( ), cmpthese ( COUT, CODEHASHREF, [ +STYLE ] ), cmpthese ( RESULTSHASHREF ), countit(TIME, CODE), disablecache ( +), enablecache ( ), timesum ( T1, T2 ) =back =item NOTES +=item EXAMPLES + =item INHERITANCE =item CAVEATS +=item SEE ALSO + =item AUTHORS =item MODIFICATION HISTORY +=back + +=head2 ByteLoader - load byte compiled perl code + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item AUTHOR + +=item SEE ALSO + +=back + +=head2 Bytecode, B::Bytecode - Perl compiler's bytecode backend + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item OPTIONS + +B<-ofilename>, B<-afilename>, B<-->, B<-f>, B<-fcompress-nullops>, +B<-fomit-sequence-numbers>, B<-fbypass-nullops>, B<-fstrip-syntax-tree>, +B<-On>, B<-D>, B<-Do>, B<-Db>, B<-Da>, B<-DC>, B<-S>, B<-m> + +=item EXAMPLES + +=item BUGS + +=item AUTHOR + +=back + =head2 CGI - Simple Common Gateway Interface Class +=over + =item SYNOPSIS =item ABSTRACT @@ -3530,8 +5013,12 @@ clearcache ( COUNT ), clearallcache ( ), disablecache ( ), enablecache ( ) =item DIRECT ACCESS TO THE PARAMETER LIST: +=item FETCHING THE PARAMETER LIST AS A HASH: + =item SAVING THE STATE OF THE SCRIPT TO A FILE: +=item RETRIEVING CGI ERRORS + =item USING THE FUNCTION-ORIENTED INTERFACE B<:cgi>, B<:form>, B<:html2>, B<:html3>, B<:netscape>, B<:html>, @@ -3539,7 +5026,14 @@ B<:standard>, B<:all> =item PRAGMAS --any, -compile, -nph, -autoload, -no_debug, -private_tempfiles +-any, -compile, -nph, -newstyle_urls, -autoload, -no_debug, +-private_tempfiles + +=item SPECIAL FORMS FOR IMPORTING HTML-TAG FUNCTIONS + +1. start_table() (generates a <TABLE> tag), 2. end_table() (generates a +</TABLE> tag), 3. start_ul() (generates a <UL> tag), 4. end_ul() (generates +a </UL> tag) =back @@ -3564,6 +5058,8 @@ B<Parameters:>, 4, 5, 6.. B<-absolute>, B<-relative>, B<-full>, B<-path> (B<-path_info>), B<-query> (B<-query_string>) +=item MIXING POST AND URL PARAMETERS + =back =item CREATING STANDARD HTML ELEMENTS: @@ -3578,6 +5074,8 @@ B<-absolute>, B<-relative>, B<-full>, B<-path> (B<-path_info>), B<-query> =item NON-STANDARD HTML SHORTCUTS +=item PRETTY-PRINTING HTML + =back =item CREATING FILL-OUT FORMS: @@ -3642,12 +5140,12 @@ TOP, BOTTOM or MIDDLE =back -=item NETSCAPE COOKIES +=item HTTP COOKIES 1. an expiration time, 2. a domain, 3. a path, 4. a "secure" flag, B<-name>, B<-value>, B<-path>, B<-domain>, B<-expires>, B<-secure> -=item WORKING WITH NETSCAPE FRAMES +=item WORKING WITH FRAMES 1. Create a <Frameset> document, 2. Specify the destination for the document in the HTTP header, 3. Specify the destination for the document in @@ -3665,12 +5163,12 @@ the <FORM> tag =item FETCHING ENVIRONMENT VARIABLES -B<accept()>, B<raw_cookie()>, B<user_agent()>, B<path_info()>, +B<Accept()>, B<raw_cookie()>, B<user_agent()>, B<path_info()>, B<path_translated()>, B<remote_host()>, B<script_name()>Return the script name as a partial URL, for self-refering scripts, B<referer()>, B<auth_type ()>, B<server_name ()>, B<virtual_host ()>, B<server_software ()>, B<remote_user ()>, B<user_name ()>, -B<request_method()> +B<request_method()>, B<content_type()>, B<http()>, B<https()> =item USING NPH SCRIPTS @@ -3679,8 +5177,7 @@ parameters in the B<header()> and B<redirect()> statements: =item Server Push -multipart_init() -multipart_init(-boundary=>$boundary);, multipart_start(), multipart_end() +multipart_init(), multipart_start(), multipart_end() =item Avoiding Denial of Service Attacks @@ -3696,15 +5193,15 @@ basis>, B<2. Globally for all scripts> Matt Heffron (heffron@falstaff.css.beckman.com), James Taylor (james.taylor@srs.gov), Scott Anguish <sanguish@digifix.com>, Mike Jewell (mlj3u@virginia.edu), Timothy Shimmin (tes@kbs.citri.edu.au), Joergen Haegg -(jh@axis.se), Laurent Delfosse (delfosse@csgrad1.cs.wvu.edu), Richard -Resnick (applepi1@aol.com), Craig Bishop (csb@barwonwater.vic.gov.au), Tony -Curtis (tc@vcpc.univie.ac.at), Tim Bunce (Tim.Bunce@ig.co.uk), Tom -Christiansen (tchrist@convex.com), Andreas Koenig -(k@franz.ww.TU-Berlin.DE), Tim MacKenzie (Tim.MacKenzie@fulcrum.com.au), -Kevin B. Hendricks (kbhend@dogwood.tyler.wm.edu), Stephen Dahmen -(joyfire@inxpress.net), Ed Jordan (ed@fidalgo.net), David Alan Pisoni -(david@cnation.com), Doug MacEachern (dougm@opengroup.org), Robin Houston -(robin@oneworld.org), ...and many many more.. +(jh@axis.se), Laurent Delfosse (delfosse@delfosse.com), Richard Resnick +(applepi1@aol.com), Craig Bishop (csb@barwonwater.vic.gov.au), Tony Curtis +(tc@vcpc.univie.ac.at), Tim Bunce (Tim.Bunce@ig.co.uk), Tom Christiansen +(tchrist@convex.com), Andreas Koenig (k@franz.ww.TU-Berlin.DE), Tim +MacKenzie (Tim.MacKenzie@fulcrum.com.au), Kevin B. Hendricks +(kbhend@dogwood.tyler.wm.edu), Stephen Dahmen (joyfire@inxpress.net), Ed +Jordan (ed@fidalgo.net), David Alan Pisoni (david@cnation.com), Doug +MacEachern (dougm@opengroup.org), Robin Houston (robin@oneworld.org), +...and many many more.. =item A COMPLETE EXAMPLE OF A SIMPLE FORM-BASED SCRIPT @@ -3712,23 +5209,31 @@ Kevin B. Hendricks (kbhend@dogwood.tyler.wm.edu), Stephen Dahmen =item SEE ALSO -=head2 CGI::Apache - Make things work with CGI.pm against Perl-Apache API +=back + +=head2 CGI::Apache - Backward compatibility module for CGI.pm + +=over =item SYNOPSIS +=item ABSTRACT + =item DESCRIPTION -=item NOTE 1 +=item AUTHOR INFORMATION -=item NOTE 2 +=item BUGS =item SEE ALSO -=item AUTHOR +=back =head2 CGI::Carp, B<CGI::Carp> - CGI routines for writing to the HTTPD (or other) error log +=over + =item SYNOPSIS =item DESCRIPTION @@ -3749,8 +5254,12 @@ other) error log =item SEE ALSO +=back + =head2 CGI::Cookie - Interface to Netscape Cookies +=over + =item SYNOPSIS =item DESCRIPTION @@ -3779,8 +5288,12 @@ B<name()>, B<value()>, B<domain()>, B<path()>, B<expires()> =item SEE ALSO +=back + =head2 CGI::Fast - CGI Interface for Fast CGI +=over + =item SYNOPSIS =item DESCRIPTION @@ -3801,8 +5314,36 @@ B<name()>, B<value()>, B<domain()>, B<path()>, B<expires()> =item SEE ALSO +=back + +=head2 CGI::Pretty - module to produce nicely formatted HTML code + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=over + +=item Tags that won't be formatted + +=item Customizing the Indenting + +=back + +=item BUGS + +=item AUTHOR + +=item SEE ALSO + +=back + =head2 CGI::Push - Simple Interface to Server Push +=over + =item SYNOPSIS =item DESCRIPTION @@ -3821,27 +5362,36 @@ B<name()>, B<value()>, B<domain()>, B<path()>, B<expires()> =item INSTALLING CGI::Push SCRIPTS -=item CAVEATS - =item AUTHOR INFORMATION =item BUGS =item SEE ALSO -=head2 CGI::Switch - Try more than one constructors and return the first -object available +=back + +=head2 CGI::Switch - Backward compatibility module for defunct CGI::Switch + +=over =item SYNOPSIS +=item ABSTRACT + =item DESCRIPTION +=item AUTHOR INFORMATION + +=item BUGS + =item SEE ALSO -=item AUTHOR +=back =head2 CPAN - query, download and build perl modules from CPAN sites +=over + =item SYNOPSIS =item DESCRIPTION @@ -3851,7 +5401,7 @@ object available =item Interactive Mode Searching for authors, bundles, distribution files and modules, make, test, -install, clean modules or distributions, readme, look module or +install, clean modules or distributions, get, readme, look module or distribution, Signals =item CPAN::Shell @@ -3866,7 +5416,7 @@ distribution, Signals expand($type,@things), Programming Examples -=item Methods in the four +=item Methods in the four Classes =item Cache Manager @@ -3878,20 +5428,22 @@ expand($type,@things), Programming Examples =item Debugging -=item Floppy, Zip, and all that Jazz +=item Floppy, Zip, Offline Mode =back =item CONFIGURATION -o conf E<lt>scalar optionE<gt>, o conf E<lt>scalar optionE<gt> -E<lt>valueE<gt>, o conf E<lt>list optionE<gt>, o conf E<lt>list optionE<gt> -[shift|pop], o conf E<lt>list optionE<gt> [unshift|push|splice] -E<lt>listE<gt> +C<o conf E<lt>scalar optionE<gt>>, C<o conf E<lt>scalar optionE<gt> +E<lt>valueE<gt>>, C<o conf E<lt>list optionE<gt>>, C<o conf E<lt>list +optionE<gt> [shift|pop]>, C<o conf E<lt>list optionE<gt> +[unshift|push|splice] E<lt>listE<gt>> =over -=item CD-ROM support +=item Note on urllist parameter's format + +=item urllist parameter has CD-ROM support =back @@ -3899,29 +5451,47 @@ E<lt>listE<gt> =item EXPORT +=item POPULATE AN INSTALLATION WITH LOTS OF MODULES + +=item WORKING WITH CPAN.pm BEHIND FIREWALLS + +http firewall, ftp firewall, One way visibility, SOCKS, IP Masquerade + =item BUGS =item AUTHOR =item SEE ALSO +=back + =head2 CPAN::FirstTime - Utility for CPAN::Config file Initialization +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 CPANox, CPAN::Nox - Wrapper around CPAN.pm without using any XS module +=over + =item SYNOPSIS =item DESCRIPTION =item SEE ALSO +=back + =head2 Carp, carp - warn of errors (from perspective of caller) +=over + =item SYNOPSIS =item DESCRIPTION @@ -3932,8 +5502,24 @@ module =back +=item BUGS + +=back + +=head2 Carp::Heavy - Carp guts + +=over + +=item SYNOPIS + +=item DESCRIPTION + +=back + =head2 Class::Struct - declare struct-like datatypes as Perl classes +=over + =item SYNOPSIS =item DESCRIPTION @@ -3947,22 +5533,337 @@ module Scalar (C<'$'> or C<'*$'>), Array (C<'@'> or C<'*@'>), Hash (C<'%'> or C<'*%'>), Class (C<'Class_Name'> or C<'*Class_Name'>) +=item Initializing with C<new> + =back =item EXAMPLES -Example 1, Example 2 +Example 1, Example 2, Example 3 =item Author and Modification History +=back + +=head2 Config - access Perl configuration information + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +myconfig(), config_sh(), config_vars(@names) + +=item EXAMPLE + +=item WARNING + +=item GLOSSARY + +=over + +=item _ + +C<_a>, C<_exe>, C<_o> + +=item a + +C<afs>, C<alignbytes>, C<ansi2knr>, C<aphostname>, C<api_revision>, +C<api_subversion>, C<api_version>, C<api_versionstring>, C<ar>, C<archlib>, +C<archlibexp>, C<archname64>, C<archname>, C<archobjs>, C<awk> + +=item b + +C<baserev>, C<bash>, C<bin>, C<bincompat5005>, C<binexp>, C<bison>, +C<byacc>, C<byteorder> + +=item c + +C<c>, C<castflags>, C<cat>, C<cc>, C<cccdlflags>, C<ccdlflags>, C<ccflags>, +C<ccsymbols>, C<cf_by>, C<cf_email>, C<cf_time>, C<charsize>, C<chgrp>, +C<chmod>, C<chown>, C<clocktype>, C<comm>, C<compress> + +=item C + +C<CONFIGDOTSH>, C<contains>, C<cp>, C<cpio>, C<cpp>, C<cpp_stuff>, +C<cppccsymbols>, C<cppflags>, C<cpplast>, C<cppminus>, C<cpprun>, +C<cppstdin>, C<cppsymbols>, C<crosscompile>, C<cryptlib>, C<csh> + +=item d + +C<d_access>, C<d_accessx>, C<d_alarm>, C<d_archlib>, C<d_atolf>, +C<d_atoll>, C<d_attribut>, C<d_bcmp>, C<d_bcopy>, C<d_bincompat5005>, +C<d_bsd>, C<d_bsdgetpgrp>, C<d_bsdsetpgrp>, C<d_bzero>, C<d_casti32>, +C<d_castneg>, C<d_charvspr>, C<d_chown>, C<d_chroot>, C<d_chsize>, +C<d_closedir>, C<d_const>, C<d_crypt>, C<d_csh>, C<d_cuserid>, +C<d_dbl_dig>, C<d_difftime>, C<d_dirnamlen>, C<d_dlerror>, C<d_dlopen>, +C<d_dlsymun>, C<d_dosuid>, C<d_drand48proto>, C<d_dup2>, C<d_eaccess>, +C<d_endgrent>, C<d_endhent>, C<d_endnent>, C<d_endpent>, C<d_endpwent>, +C<d_endsent>, C<d_endspent>, C<d_eofnblk>, C<d_eunice>, C<d_fchmod>, +C<d_fchown>, C<d_fcntl>, C<d_fd_macros>, C<d_fd_set>, C<d_fds_bits>, +C<d_fgetpos>, C<d_flexfnam>, C<d_flock>, C<d_fork>, C<d_fpathconf>, +C<d_fpos64_t>, C<d_fs_data_s>, C<d_fseeko>, C<d_fsetpos>, C<d_fstatfs>, +C<d_fstatvfs>, C<d_ftello>, C<d_ftime>, C<d_Gconvert>, C<d_getcwd>, +C<d_getfsstat>, C<d_getgrent>, C<d_getgrps>, C<d_gethbyaddr>, +C<d_gethbyname>, C<d_gethent>, C<d_gethname>, C<d_gethostprotos>, +C<d_getlogin>, C<d_getmnt>, C<d_getmntent>, C<d_getnbyaddr>, +C<d_getnbyname>, C<d_getnent>, C<d_getnetprotos>, C<d_getpbyname>, +C<d_getpbynumber>, C<d_getpent>, C<d_getpgid>, C<d_getpgrp2>, C<d_getpgrp>, +C<d_getppid>, C<d_getprior>, C<d_getprotoprotos>, C<d_getpwent>, +C<d_getsbyname>, C<d_getsbyport>, C<d_getsent>, C<d_getservprotos>, +C<d_getspent>, C<d_getspnam>, C<d_gettimeod>, C<d_gnulibc>, C<d_grpasswd>, +C<d_hasmntopt>, C<d_htonl>, C<d_iconv>, C<d_index>, C<d_inetaton>, +C<d_int64_t>, C<d_isascii>, C<d_killpg>, C<d_lchown>, C<d_ldbl_dig>, +C<d_link>, C<d_locconv>, C<d_lockf>, C<d_longdbl>, C<d_longlong>, +C<d_lseekproto>, C<d_lstat>, C<d_madvise>, C<d_mblen>, C<d_mbstowcs>, +C<d_mbtowc>, C<d_memchr>, C<d_memcmp>, C<d_memcpy>, C<d_memmove>, +C<d_memset>, C<d_mkdir>, C<d_mkdtemp>, C<d_mkfifo>, C<d_mkstemp>, +C<d_mkstemps>, C<d_mktime>, C<d_mmap>, C<d_mprotect>, C<d_msg>, +C<d_msg_ctrunc>, C<d_msg_dontroute>, C<d_msg_oob>, C<d_msg_peek>, +C<d_msg_proxy>, C<d_msgctl>, C<d_msgget>, C<d_msgrcv>, C<d_msgsnd>, +C<d_msync>, C<d_munmap>, C<d_mymalloc>, C<d_nice>, C<d_nv_preserves_uv>, +C<d_off64_t>, C<d_old_pthread_create_joinable>, C<d_oldpthreads>, +C<d_oldsock>, C<d_open3>, C<d_pathconf>, C<d_pause>, C<d_phostname>, +C<d_pipe>, C<d_poll>, C<d_portable>, C<d_PRId64>, C<d_PRIeldbl>, +C<d_PRIEldbl>, C<d_PRIfldbl>, C<d_PRIFldbl>, C<d_PRIgldbl>, C<d_PRIGldbl>, +C<d_PRIi64>, C<d_PRIo64>, C<d_PRIu64>, C<d_PRIx64>, C<d_PRIX64>, +C<d_pthread_yield>, C<d_pwage>, C<d_pwchange>, C<d_pwclass>, +C<d_pwcomment>, C<d_pwexpire>, C<d_pwgecos>, C<d_pwpasswd>, C<d_pwquota>, +C<d_qgcvt>, C<d_quad>, C<d_readdir>, C<d_readlink>, C<d_rename>, +C<d_rewinddir>, C<d_rmdir>, C<d_safebcpy>, C<d_safemcpy>, C<d_sanemcmp>, +C<d_sched_yield>, C<d_scm_rights>, C<d_seekdir>, C<d_select>, C<d_sem>, +C<d_semctl>, C<d_semctl_semid_ds>, C<d_semctl_semun>, C<d_semget>, +C<d_semop>, C<d_setegid>, C<d_seteuid>, C<d_setgrent>, C<d_setgrps>, +C<d_sethent>, C<d_setlinebuf>, C<d_setlocale>, C<d_setnent>, C<d_setpent>, +C<d_setpgid>, C<d_setpgrp2>, C<d_setpgrp>, C<d_setprior>, C<d_setpwent>, +C<d_setregid>, C<d_setresgid>, C<d_setresuid>, C<d_setreuid>, C<d_setrgid>, +C<d_setruid>, C<d_setsent>, C<d_setsid>, C<d_setspent>, C<d_setvbuf>, +C<d_sfio>, C<d_shm>, C<d_shmat>, C<d_shmatprototype>, C<d_shmctl>, +C<d_shmdt>, C<d_shmget>, C<d_sigaction>, C<d_sigsetjmp>, C<d_socket>, +C<d_socklen_t>, C<d_sockpair>, C<d_sqrtl>, C<d_statblks>, +C<d_statfs_f_flags>, C<d_statfs_s>, C<d_statvfs>, C<d_stdio_cnt_lval>, +C<d_stdio_ptr_lval>, C<d_stdio_stream_array>, C<d_stdiobase>, +C<d_stdstdio>, C<d_strchr>, C<d_strcoll>, C<d_strctcpy>, C<d_strerrm>, +C<d_strerror>, C<d_strtod>, C<d_strtol>, C<d_strtold>, C<d_strtoll>, +C<d_strtoul>, C<d_strtoull>, C<d_strtouq>, C<d_strxfrm>, C<d_suidsafe>, +C<d_symlink>, C<d_syscall>, C<d_sysconf>, C<d_sysernlst>, C<d_syserrlst>, +C<d_system>, C<d_tcgetpgrp>, C<d_tcsetpgrp>, C<d_telldir>, +C<d_telldirproto>, C<d_time>, C<d_times>, C<d_truncate>, C<d_tzname>, +C<d_umask>, C<d_uname>, C<d_union_semun>, C<d_ustat>, C<d_vendorarch>, +C<d_vendorbin>, C<d_vendorlib>, C<d_vfork>, C<d_void_closedir>, +C<d_voidsig>, C<d_voidtty>, C<d_volatile>, C<d_vprintf>, C<d_wait4>, +C<d_waitpid>, C<d_wcstombs>, C<d_wctomb>, C<d_xenix>, C<date>, +C<db_hashtype>, C<db_prefixtype>, C<defvoidused>, C<direntrytype>, +C<dlext>, C<dlsrc>, C<doublesize>, C<drand01>, C<dynamic_ext> + +=item e + +C<eagain>, C<ebcdic>, C<echo>, C<egrep>, C<emacs>, C<eunicefix>, +C<exe_ext>, C<expr>, C<extensions> + +=item f + +C<fflushall>, C<fflushNULL>, C<find>, C<firstmakefile>, C<flex>, +C<fpossize>, C<fpostype>, C<freetype>, C<full_ar>, C<full_csh>, C<full_sed> + +=item g + +C<gccversion>, C<gidformat>, C<gidsign>, C<gidsize>, C<gidtype>, +C<glibpth>, C<grep>, C<groupcat>, C<groupstype>, C<gzip> + +=item h + +C<h_fcntl>, C<h_sysfile>, C<hint>, C<hostcat>, C<huge> + +=item i + +C<i16size>, C<i16type>, C<i32size>, C<i32type>, C<i64size>, C<i64type>, +C<i8size>, C<i8type>, C<i_arpainet>, C<i_bsdioctl>, C<i_db>, C<i_dbm>, +C<i_dirent>, C<i_dld>, C<i_dlfcn>, C<i_fcntl>, C<i_float>, C<i_gdbm>, +C<i_grp>, C<i_iconv>, C<i_ieeefp>, C<i_inttypes>, C<i_limits>, C<i_locale>, +C<i_machcthr>, C<i_malloc>, C<i_math>, C<i_memory>, C<i_mntent>, C<i_ndbm>, +C<i_netdb>, C<i_neterrno>, C<i_netinettcp>, C<i_niin>, C<i_poll>, +C<i_pthread>, C<i_pwd>, C<i_rpcsvcdbm>, C<i_sfio>, C<i_sgtty>, C<i_shadow>, +C<i_socks>, C<i_stdarg>, C<i_stddef>, C<i_stdlib>, C<i_string>, +C<i_sunmath>, C<i_sysaccess>, C<i_sysdir>, C<i_sysfile>, C<i_sysfilio>, +C<i_sysin>, C<i_sysioctl>, C<i_syslog>, C<i_sysmman>, C<i_sysmode>, +C<i_sysmount>, C<i_sysndir>, C<i_sysparam>, C<i_sysresrc>, C<i_syssecrt>, +C<i_sysselct>, C<i_syssockio>, C<i_sysstat>, C<i_sysstatfs>, +C<i_sysstatvfs>, C<i_systime>, C<i_systimek>, C<i_systimes>, C<i_systypes>, +C<i_sysuio>, C<i_sysun>, C<i_sysutsname>, C<i_sysvfs>, C<i_syswait>, +C<i_termio>, C<i_termios>, C<i_time>, C<i_unistd>, C<i_ustat>, C<i_utime>, +C<i_values>, C<i_varargs>, C<i_varhdr>, C<i_vfork>, +C<ignore_versioned_solibs>, C<inc_version_list>, C<inc_version_list_init>, +C<incpath>, C<inews>, C<installarchlib>, C<installbin>, C<installman1dir>, +C<installman3dir>, C<installprefix>, C<installprefixexp>, +C<installprivlib>, C<installscript>, C<installsitearch>, C<installsitebin>, +C<installsitelib>, C<installstyle>, C<installusrbinperl>, +C<installvendorarch>, C<installvendorbin>, C<installvendorlib>, C<intsize>, +C<ivdformat>, C<ivsize>, C<ivtype> + +=item k + +C<known_extensions>, C<ksh> + +=item l + +C<large>, C<ld>, C<lddlflags>, C<ldflags>, C<ldlibpthname>, C<less>, +C<lib_ext>, C<libc>, C<libperl>, C<libpth>, C<libs>, C<libsdirs>, +C<libsfiles>, C<libsfound>, C<libspath>, C<libswanted>, C<line>, C<lint>, +C<lkflags>, C<ln>, C<lns>, C<locincpth>, C<loclibpth>, C<longdblsize>, +C<longlongsize>, C<longsize>, C<lp>, C<lpr>, C<ls>, C<lseeksize>, +C<lseektype> + +=item m + +C<mail>, C<mailx>, C<make>, C<make_set_make>, C<mallocobj>, C<mallocsrc>, +C<malloctype>, C<man1dir>, C<man1direxp>, C<man1ext>, C<man3dir>, +C<man3direxp>, C<man3ext> + +=item M + +C<Mcc>, C<medium>, C<mips_type>, C<mkdir>, C<mmaptype>, C<models>, +C<modetype>, C<more>, C<multiarch>, C<mv>, C<myarchname>, C<mydomain>, +C<myhostname>, C<myuname> + +=item n + +C<n>, C<netdb_hlen_type>, C<netdb_host_type>, C<netdb_name_type>, +C<netdb_net_type>, C<nm>, C<nm_opt>, C<nm_so_opt>, C<nonxs_ext>, C<nroff>, +C<nvsize>, C<nvtype> + +=item o + +C<o_nonblock>, C<obj_ext>, C<old_pthread_create_joinable>, C<optimize>, +C<orderlib>, C<osname>, C<osvers> + +=item p + +C<package>, C<pager>, C<passcat>, C<patchlevel>, C<path_sep>, C<perl5>, +C<perl> + +=item P + +C<PERL_REVISION>, C<PERL_SUBVERSION>, C<PERL_VERSION>, C<perladmin>, +C<perlpath>, C<pg>, C<phostname>, C<pidtype>, C<plibpth>, C<pm_apiversion>, +C<pmake>, C<pr>, C<prefix>, C<prefixexp>, C<privlib>, C<privlibexp>, +C<prototype>, C<ptrsize> + +=item q + +C<quadkind>, C<quadtype> + +=item r + +C<randbits>, C<randfunc>, C<randseedtype>, C<ranlib>, C<rd_nodata>, +C<revision>, C<rm>, C<rmail>, C<runnm> + +=item s + +C<sched_yield>, C<scriptdir>, C<scriptdirexp>, C<sed>, C<seedfunc>, +C<selectminbits>, C<selecttype>, C<sendmail>, C<sh>, C<shar>, C<sharpbang>, +C<shmattype>, C<shortsize>, C<shrpenv>, C<shsharp>, C<sig_count>, +C<sig_name>, C<sig_name_init>, C<sig_num>, C<sig_num_init>, C<signal_t>, +C<sitearch>, C<sitearchexp>, C<sitebin>, C<sitebinexp>, C<sitelib>, +C<sitelib_stem>, C<sitelibexp>, C<siteprefix>, C<siteprefixexp>, +C<sizesize>, C<sizetype>, C<sleep>, C<smail>, C<small>, C<so>, +C<sockethdr>, C<socketlib>, C<socksizetype>, C<sort>, C<spackage>, +C<spitshell>, C<split>, C<sPRId64>, C<sPRIeldbl>, C<sPRIEldbl>, +C<sPRIfldbl>, C<sPRIFldbl>, C<sPRIgldbl>, C<sPRIGldbl>, C<sPRIi64>, +C<sPRIo64>, C<sPRIu64>, C<sPRIx64>, C<sPRIX64>, C<src>, C<ssizetype>, +C<startperl>, C<startsh>, C<static_ext>, C<stdchar>, C<stdio_base>, +C<stdio_bufsiz>, C<stdio_cnt>, C<stdio_filbuf>, C<stdio_ptr>, +C<stdio_stream_array>, C<strings>, C<submit>, C<subversion>, C<sysman> + +=item t + +C<tail>, C<tar>, C<tbl>, C<tee>, C<test>, C<timeincl>, C<timetype>, +C<touch>, C<tr>, C<trnl>, C<troff> + +=item u + +C<u16size>, C<u16type>, C<u32size>, C<u32type>, C<u64size>, C<u64type>, +C<u8size>, C<u8type>, C<uidformat>, C<uidsign>, C<uidsize>, C<uidtype>, +C<uname>, C<uniq>, C<uquadtype>, C<use5005threads>, C<use64bitall>, +C<use64bitint>, C<usedl>, C<useithreads>, C<uselargefiles>, +C<uselongdouble>, C<usemorebits>, C<usemultiplicity>, C<usemymalloc>, +C<usenm>, C<useopcode>, C<useperlio>, C<useposix>, C<usesfio>, +C<useshrplib>, C<usesocks>, C<usethreads>, C<usevendorprefix>, C<usevfork>, +C<usrinc>, C<uuname>, C<uvoformat>, C<uvsize>, C<uvtype>, C<uvuformat>, +C<uvxformat> + +=item v + +C<vendorarch>, C<vendorarchexp>, C<vendorbin>, C<vendorbinexp>, +C<vendorlib>, C<vendorlib_stem>, C<vendorlibexp>, C<vendorprefix>, +C<vendorprefixexp>, C<version>, C<vi>, C<voidflags> + +=item x + +C<xlibpth>, C<xs_apiversion> + +=item z + +C<zcat>, C<zip> + +=back + +=item NOTE + +=back + =head2 Cwd, getcwd - get pathname of current working directory +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=back + +=head2 DB - programmatic interface to the Perl debugging API (draft, +subject to +change) + +=over + =item SYNOPSIS =item DESCRIPTION +=over + +=item Global Variables + + $DB::sub, %DB::sub, $DB::single, $DB::signal, $DB::trace, @DB::args, +@DB::dbline, %DB::dbline, $DB::package, $DB::filename, $DB::subname, +$DB::lineno + +=item API Methods + +CLIENT->register(), CLIENT->evalcode(STRING), CLIENT->skippkg('D::hide'), +CLIENT->run(), CLIENT->step(), CLIENT->next(), CLIENT->done() + +=item Client Callback Methods + +CLIENT->init(), CLIENT->prestop([STRING]), CLIENT->stop(), CLIENT->idle(), +CLIENT->poststop([STRING]), CLIENT->evalcode(STRING), CLIENT->cleanup(), +CLIENT->output(LIST) + +=back + +=item BUGS + +=item AUTHOR + +=back + =head2 DB_File - Perl5 access to Berkeley DB version 1.x +=over + =item SYNOPSIS =item DESCRIPTION @@ -3971,7 +5872,7 @@ B<DB_HASH>, B<DB_BTREE>, B<DB_RECNO> =over -=item Using DB_File with Berkeley DB version 2 +=item Using DB_File with Berkeley DB version 2 or 3 =item Interface to Berkeley DB @@ -4001,6 +5902,10 @@ B<DB_HASH>, B<DB_BTREE>, B<DB_RECNO> =item The get_dup() Method +=item The find_dup() Method + +=item The del_dup() Method + =item Matching Partial Keys =back @@ -4013,7 +5918,7 @@ B<DB_HASH>, B<DB_BTREE>, B<DB_RECNO> =item A Simple Example -=item Extra Methods +=item Extra RECNO Methods B<$X-E<gt>push(list) ;>, B<$value = $X-E<gt>pop ;>, B<$X-E<gt>shift>, B<$X-E<gt>unshift(list) ;>, B<$X-E<gt>length> @@ -4029,11 +5934,30 @@ $X-E<gt>put($key, $value [, $flags]) ;>, B<$status = $X-E<gt>del($key [, $flags]) ;>, B<$status = $X-E<gt>fd ;>, B<$status = $X-E<gt>seq($key, $value, $flags) ;>, B<$status = $X-E<gt>sync([$flags]) ;> +=item DBM FILTERS + +B<filter_store_key>, B<filter_store_value>, B<filter_fetch_key>, +B<filter_fetch_value> + +=over + +=item The Filter + +=item An Example -- the NULL termination problem. + +=item Another Example -- Key is a C int. + +=back + =item HINTS AND TIPS =over -=item Locking Databases +=item Locking: The Trouble with fd + +=item Safe ways to lock a database + +B<Tie::DB_Lock>, B<Tie::DB_LockFile>, B<DB_File::Lock> =item Sharing Databases With C Applications @@ -4055,6 +5979,8 @@ $value, $flags) ;>, B<$status = $X-E<gt>sync([$flags]) ;> =back +=item REFERENCES + =item HISTORY =item BUGS @@ -4067,9 +5993,13 @@ $value, $flags) ;>, B<$status = $X-E<gt>sync([$flags]) ;> =item AUTHOR +=back + =head2 Data::Dumper - stringified perl data structures, suitable for both printing and C<eval> +=over + =item SYNOPSIS =item DESCRIPTION @@ -4078,15 +6008,14 @@ printing and C<eval> =item Methods -I<PACKAGE>->new(I<ARRAYREF [>, I<ARRAYREF]>), I<$OBJ>->Dump I<or> -I<PACKAGE>->Dump(I<ARRAYREF [>, I<ARRAYREF]>), I<$OBJ>->Dumpxs I<or> -I<PACKAGE>->Dumpxs(I<ARRAYREF [>, I<ARRAYREF]>), -I<$OBJ>->Seen(I<[HASHREF]>), I<$OBJ>->Values(I<[ARRAYREF]>), -I<$OBJ>->Names(I<[ARRAYREF]>), I<$OBJ>->Reset +I<PACKAGE>->new(I<ARRAYREF [>, I<ARRAYREF]>), I<$OBJ>->Dump I<or> +I<PACKAGE>->Dump(I<ARRAYREF [>, I<ARRAYREF]>), I<$OBJ>->Seen(I<[HASHREF]>), +I<$OBJ>->Values(I<[ARRAYREF]>), I<$OBJ>->Names(I<[ARRAYREF]>), +I<$OBJ>->Reset =item Functions -Dumper(I<LIST>), DumperX(I<LIST>) +Dumper(I<LIST>) =item Configuration Variables or Methods @@ -4100,7 +6029,8 @@ $Data::Dumper::Freezer I<or> $I<OBJ>->Freezer(I<[NEWVAL]>), $Data::Dumper::Toaster I<or> $I<OBJ>->Toaster(I<[NEWVAL]>), $Data::Dumper::Deepcopy I<or> $I<OBJ>->Deepcopy(I<[NEWVAL]>), $Data::Dumper::Quotekeys I<or> $I<OBJ>->Quotekeys(I<[NEWVAL]>), -$Data::Dumper::Bless I<or> $I<OBJ>->Bless(I<[NEWVAL]>) +$Data::Dumper::Bless I<or> $I<OBJ>->Bless(I<[NEWVAL]>), +$Data::Dumper::Maxdepth I<or> $I<OBJ>->Maxdepth(I<[NEWVAL]>) =item Exports @@ -4118,41 +6048,203 @@ Dumper =item SEE ALSO +=back + +=head2 Devel::DProf - a Perl code profiler + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item PROFILE FORMAT + +=item AUTOLOAD + +=item ENVIRONMENT + +=item BUGS + +=item SEE ALSO + +=back + +=head2 Devel::Peek - A data debugging tool for the XS programmer + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item EXAMPLES + +=over + +=item A simple scalar string + +=item A simple scalar number + +=item A simple scalar with an extra reference + +=item A reference to a simple scalar + +=item A reference to an array + +=item A reference to a hash + +=item Dumping a large array or hash + +=item A reference to an SV which holds a C pointer + +=item A reference to a subroutine + +=back + +=item EXPORTS + +=item BUGS + +=item AUTHOR + +=item SEE ALSO + +=back + =head2 Devel::SelfStubber - generate stubs for a SelfLoading module +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 DirHandle - supply object methods for directory handles +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=back + +=head2 Dumpvalue - provides screen dump of Perl data. + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=over + +=item Creation + +C<arrayDepth>, C<hashDepth>, C<compactDump>, C<veryCompact>, C<globPrint>, +C<DumpDBFiles>, C<DumpPackages>, C<DumpReused>, C<tick>, C<HighBit>, +C<printUndef>, C<UsageOnly>, unctrl, subdump, bareStringify, quoteHighBit, +stopDbSignal + +=item Methods + +dumpValue, dumpValues, dumpvars, set_quote, set_unctrl, compactDump, +veryCompact, set, get + +=back + +=back + +=head2 DynaLoader - Dynamically load C libraries into Perl code + +=over + =item SYNOPSIS =item DESCRIPTION +@dl_library_path, @dl_resolve_using, @dl_require_symbols, @dl_librefs, +@dl_modules, dl_error(), $dl_debug, dl_findfile(), dl_expandspec(), +dl_load_file(), dl_unload_file(), dl_loadflags(), dl_find_symbol(), +dl_find_symbol_anywhere(), dl_undef_symbols(), dl_install_xsub(), +bootstrap() + +=item AUTHOR + +=back + +=head2 DynaLoader::XSLoader, XSLoader - Dynamically load C libraries into +Perl code + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item AUTHOR + +=back + =head2 English - use nice English (or awk) names for ugly punctuation variables +=over + =item SYNOPSIS =item DESCRIPTION -=head2 Env - perl module that imports environment variables +=item BUGS + +=back + +=head2 Env - perl module that imports environment variables as scalars or +arrays + +=over =item SYNOPSIS =item DESCRIPTION +=item LIMITATIONS + =item AUTHOR +=back + +=head2 Errno - System errno constants + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CAVEATS + +=item AUTHOR + +=item COPYRIGHT + +=back + =head2 Exporter - Implements default import method for modules +=over + =item SYNOPSIS =item DESCRIPTION =over +=item How to Export + =item Selecting What To Export =item Specialised Import Lists @@ -4167,16 +6259,50 @@ variables =back +=back + +=head2 Exporter::Heavy - Exporter guts + +=over + +=item SYNOPIS + +=item DESCRIPTION + +=back + =head2 ExtUtils::Command - utilities to replace common UNIX commands in Makefiles etc. +=over + =item SYNOPSIS =item DESCRIPTION -cat, eqtime src dst, rm_f files..., rm_f files..., touch files .., mv -source... destination, cp source... destination, chmod mode files.., mkpath -directory.., test_f file +=back + +cat + +eqtime src dst + +rm_f files... + +rm_f files... + +touch files .. + +mv source... destination + +cp source... destination + +chmod mode files.. + +mkpath directory.. + +test_f file + +=over =item BUGS @@ -4184,8 +6310,12 @@ directory.., test_f file =item AUTHOR +=back + =head2 ExtUtils::Embed - Utilities for embedding Perl in C/C++ applications +=over + =item SYNOPSIS =item DESCRIPTION @@ -4203,14 +6333,22 @@ ccopts(), xsi_header(), xsi_protos(@modules), xsi_body(@modules) =item AUTHOR +=back + =head2 ExtUtils::Install - install files from here to there +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 ExtUtils::Installed - Inventory management of installed modules +=over + =item SYNOPSIS =item DESCRIPTION @@ -4226,8 +6364,12 @@ packlist(), version() =item AUTHOR +=back + =head2 ExtUtils::Liblist - determine libraries to use and how to use them +=over + =item SYNOPSIS =item DESCRIPTION @@ -4256,15 +6398,36 @@ For static extensions, For dynamic extensions, For dynamic extensions =item SEE ALSO +=back + +=head2 ExtUtils::MM_Cygwin - methods to override UN*X behaviour in +ExtUtils::MakeMaker + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +canonpath, cflags, manifypods, perl_archive + +=back + =head2 ExtUtils::MM_OS2 - methods to override UN*X behaviour in ExtUtils::MakeMaker +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 ExtUtils::MM_Unix - methods used by ExtUtils::MakeMaker +=over + =item SYNOPSIS =item DESCRIPTION @@ -4275,37 +6438,203 @@ ExtUtils::MakeMaker =item Preloaded methods -canonpath, catdir, catfile, curdir, rootdir, updir +canonpath + +=back + +=back + +catdir + +catfile + +curdir + +rootdir + +updir + +=over =item SelfLoaded methods -c_o (o), cflags (o), clean (o), const_cccmd (o), const_config (o), -const_loadlibs (o), constants (o), depend (o), dir_target (o), dist (o), -dist_basics (o), dist_ci (o), dist_core (o), dist_dir (o), dist_test (o), -dlsyms (o), dynamic (o), dynamic_bs (o), dynamic_lib (o), exescan, -extliblist, file_name_is_absolute, find_perl +c_o (o) + +=back + +cflags (o) + +clean (o) + +const_cccmd (o) + +const_config (o) + +const_loadlibs (o) + +constants (o) + +depend (o) + +dir_target (o) + +dist (o) + +dist_basics (o) + +dist_ci (o) + +dist_core (o) + +dist_dir (o) + +dist_test (o) + +dlsyms (o) + +dynamic (o) + +dynamic_bs (o) + +dynamic_lib (o) + +exescan + +extliblist + +file_name_is_absolute + +find_perl + +=over =item Methods to actually produce chunks of text for the Makefile -fixin, force (o), guess_name, has_link_code, init_dirscan, init_main, -init_others, install (o), installbin (o), libscan (o), linkext (o), lsdir, -macro (o), makeaperl (o), makefile (o), manifypods (o), maybe_command, -maybe_command_in_dirs, needs_linking (o), nicetext, parse_version, -parse_abstract, pasthru (o), path, perl_script, perldepend (o), ppd, -perm_rw (o), perm_rwx (o), pm_to_blib, post_constants (o), post_initialize -(o), postamble (o), prefixify, processPL (o), realclean (o), -replace_manpage_separator, static (o), static_lib (o), staticmake (o), -subdir_x (o), subdirs (o), test (o), test_via_harness (o), test_via_script -(o), tool_autosplit (o), tools_other (o), tool_xsubpp (o), top_targets (o), -writedoc, xs_c (o), xs_o (o), perl_archive, export_list +fixin =back +force (o) + +guess_name + +has_link_code + +htmlifypods (o) + +init_dirscan + +init_main + +init_others + +install (o) + +installbin (o) + +libscan (o) + +linkext (o) + +lsdir + +macro (o) + +makeaperl (o) + +makefile (o) + +manifypods (o) + +maybe_command + +maybe_command_in_dirs + +needs_linking (o) + +nicetext + +parse_version + +parse_abstract + +pasthru (o) + +path + +perl_script + +perldepend (o) + +ppd + +perm_rw (o) + +perm_rwx (o) + +pm_to_blib + +post_constants (o) + +post_initialize (o) + +postamble (o) + +prefixify + +processPL (o) + +realclean (o) + +replace_manpage_separator + +static (o) + +static_lib (o) + +staticmake (o) + +subdir_x (o) + +subdirs (o) + +test (o) + +test_via_harness (o) + +test_via_script (o) + +tool_autosplit (o) + +tools_other (o) + +tool_xsubpp (o) + +top_targets (o) + +writedoc + +xs_c (o) + +xs_cpp (o) + +xs_o (o) + +perl_archive + +export_list + +=over + =item SEE ALSO +=back + =head2 ExtUtils::MM_VMS - methods to override UN*X behaviour in ExtUtils::MakeMaker +=over + =item SYNOPSIS =item DESCRIPTION @@ -4314,43 +6643,159 @@ ExtUtils::MakeMaker =item Methods always loaded -eliminate_macros, fixpath, catdir, catfile, wraplist, curdir (override), -rootdir (override), updir (override) +wraplist + +=back + +=back + +rootdir (override) + +=over =item SelfLoaded methods -guess_name (override), find_perl (override), path (override), maybe_command -(override), maybe_command_in_dirs (override), perl_script (override), -file_name_is_absolute (override), replace_manpage_separator, init_others -(override), constants (override), cflags (override), const_cccmd -(override), pm_to_blib (override), tool_autosplit (override), tool_sxubpp -(override), xsubpp_version (override), tools_other (override), dist -(override), c_o (override), xs_c (override), xs_o (override), top_targets -(override), dlsyms (override), dynamic_lib (override), dynamic_bs -(override), static_lib (override), manifypods (override), processPL -(override), installbin (override), subdir_x (override), clean (override), -realclean (override), dist_basics (override), dist_core (override), -dist_dir (override), dist_test (override), install (override), perldepend -(override), makefile (override), test (override), test_via_harness -(override), test_via_script (override), makeaperl (override), nicetext -(override) +guess_name (override) =back +find_perl (override) + +path (override) + +maybe_command (override) + +maybe_command_in_dirs (override) + +perl_script (override) + +file_name_is_absolute (override) + +replace_manpage_separator + +init_others (override) + +constants (override) + +cflags (override) + +const_cccmd (override) + +pm_to_blib (override) + +tool_autosplit (override) + +tool_sxubpp (override) + +xsubpp_version (override) + +tools_other (override) + +dist (override) + +c_o (override) + +xs_c (override) + +xs_o (override) + +top_targets (override) + +dlsyms (override) + +dynamic_lib (override) + +dynamic_bs (override) + +static_lib (override) + +manifypods (override) + +processPL (override) + +installbin (override) + +subdir_x (override) + +clean (override) + +realclean (override) + +dist_basics (override) + +dist_core (override) + +dist_dir (override) + +dist_test (override) + +install (override) + +perldepend (override) + +makefile (override) + +test (override) + +test_via_harness (override) + +test_via_script (override) + +makeaperl (override) + +nicetext (override) + =head2 ExtUtils::MM_Win32 - methods to override UN*X behaviour in ExtUtils::MakeMaker +=over + =item SYNOPSIS =item DESCRIPTION -catfile, constants (o), static_lib (o), dynamic_bs (o), dynamic_lib (o), -canonpath, perl_script, pm_to_blib, test_via_harness (o), tool_autosplit -(override), tools_other (o), xs_o (o), top_targets (o), manifypods (o), -dist_ci (o), dist_core (o), pasthru (o) +=back + +catfile + +constants (o) + +static_lib (o) + +dynamic_bs (o) + +dynamic_lib (o) + +canonpath + +perl_script + +pm_to_blib + +test_via_harness (o) + +tool_autosplit (override) + +tools_other (o) + +xs_o (o) + +top_targets (o) + +htmlifypods (o) + +manifypods (o) + +dist_ci (o) + +dist_core (o) + +pasthru (o) =head2 ExtUtils::MakeMaker - create an extension Makefile +=over + =item SYNOPSIS =item DESCRIPTION @@ -4379,21 +6824,25 @@ dist_ci (o), dist_core (o), pasthru (o) =item Using Attributes and Parameters -C, CCFLAGS, CONFIG, CONFIGURE, DEFINE, DIR, DISTNAME, DL_FUNCS, DL_VARS, -EXCLUDE_EXT, EXE_FILES, NO_VC, FIRST_MAKEFILE, FULLPERL, H, IMPORTS, INC, -INCLUDE_EXT, INSTALLARCHLIB, INSTALLBIN, INSTALLDIRS, INSTALLMAN1DIR, -INSTALLMAN3DIR, INSTALLPRIVLIB, INSTALLSCRIPT, INSTALLSITELIB, -INSTALLSITEARCH, INST_ARCHLIB, INST_BIN, INST_EXE, INST_LIB, INST_MAN1DIR, -INST_MAN3DIR, INST_SCRIPT, LDFROM, LIBPERL_A, LIB, LIBS, LINKTYPE, -MAKEAPERL, MAKEFILE, MAN1PODS, MAN3PODS, MAP_TARGET, MYEXTLIB, NAME, -NEEDS_LINKING, NOECHO, NORECURS, OBJECT, OPTIMIZE, PERL, PERLMAINCC, -PERL_ARCHLIB, PERL_LIB, PERL_SRC, PERM_RW, PERM_RWX, PL_FILES, PM, -PMLIBDIRS, PREFIX, PREREQ_PM, SKIP, TYPEMAPS, VERSION, VERSION_FROM, XS, -XSOPT, XSPROTOARG, XS_VERSION +AUTHOR, ABSTRACT, ABSTRACT_FROM, BINARY_LOCATION, C, CAPI, CCFLAGS, CONFIG, +CONFIGURE, DEFINE, DIR, DISTNAME, DL_FUNCS, DL_VARS, EXCLUDE_EXT, +EXE_FILES, FIRST_MAKEFILE, FULLPERL, FUNCLIST, H, HTMLLIBPODS, +HTMLSCRIPTPODS, IMPORTS, INC, INCLUDE_EXT, INSTALLARCHLIB, INSTALLBIN, +INSTALLDIRS, INSTALLHTMLPRIVLIBDIR, INSTALLHTMLSCRIPTDIR, +INSTALLHTMLSITELIBDIR, INSTALLMAN1DIR, INSTALLMAN3DIR, INSTALLPRIVLIB, +INSTALLSCRIPT, INSTALLSITEARCH, INSTALLSITELIB, INST_ARCHLIB, INST_BIN, +INST_EXE, INST_LIB, INST_HTMLLIBDIR, INST_HTMLSCRIPTDIR, INST_MAN1DIR, +INST_MAN3DIR, INST_SCRIPT, PERL_MALLOC_OK, LDFROM, LIB, LIBPERL_A, LIBS, +LINKTYPE, MAKEAPERL, MAKEFILE, MAN1PODS, MAN3PODS, MAP_TARGET, MYEXTLIB, +NAME, NEEDS_LINKING, NOECHO, NORECURS, NO_VC, OBJECT, OPTIMIZE, PERL, +PERLMAINCC, PERL_ARCHLIB, PERL_LIB, PERL_SRC, PERM_RW, PERM_RWX, PL_FILES, +PM, PMLIBDIRS, POLLUTE, PPM_INSTALL_EXEC, PPM_INSTALL_SCRIPT, PREFIX, +PREREQ_PM, SKIP, TYPEMAPS, VERSION, VERSION_FROM, XS, XSOPT, XSPROTOARG, +XS_VERSION =item Additional lowercase attributes -clean, depend, dist, dynamic_lib, installpm, linkext, macro, realclean, +clean, depend, dist, dynamic_lib, linkext, macro, realclean, test, tool_autosplit =item Overriding MakeMaker Methods @@ -4402,20 +6851,28 @@ tool_autosplit =item Distribution Support -make distcheck, make skipcheck, make distclean, make manifest, -make distdir, make tardist, make dist, make uutardist, make + make distcheck, make skipcheck, make distclean, make manifest, + make distdir, make tardist, make dist, make uutardist, make shdist, make zipdist, make ci =item Disabling an extension =back +=item ENVIRONMENT + +PERL_MM_OPT + =item SEE ALSO =item AUTHORS +=back + =head2 ExtUtils::Manifest - utilities to write and check a MANIFEST file +=over + =item SYNOPSIS =item DESCRIPTION @@ -4435,27 +6892,51 @@ C<Added to MANIFEST:> I<file> =item AUTHOR +=back + +=head2 ExtUtils::Miniperl, writemain - write the C code for perlmain.c + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item SEE ALSO + +=back + =head2 ExtUtils::Mkbootstrap - make a bootstrap file for use by DynaLoader +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 ExtUtils::Mksymlists - write linker options files for dynamic extension +=over + =item SYNOPSIS =item DESCRIPTION -NAME, DL_FUNCS, DL_VARS, FILE, FUNCLIST, DLBASE +DLBASE, DL_FUNCS, DL_VARS, FILE, FUNCLIST, IMPORTS, NAME =item AUTHOR =item REVISION +=back + =head2 ExtUtils::Packlist - manage .packlist files +=over + =item SYNOPSIS =item DESCRIPTION @@ -4470,22 +6951,34 @@ new(), read(), write(), validate(), packlist_file() =item AUTHOR +=back + =head2 ExtUtils::testlib - add blib/* directories to @INC +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 Fatal - replace functions with equivalents which succeed or die +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 Fcntl - load the C Fcntl.h defines +=over + =item SYNOPSIS =item DESCRIPTION @@ -4494,8 +6987,12 @@ new(), read(), write(), validate(), packlist_file() =item EXPORTED SYMBOLS +=back + =head2 File::Basename, fileparse - split a pathname into pieces +=over + =item SYNOPSIS =item DESCRIPTION @@ -4506,14 +7003,22 @@ fileparse_set_fstype, fileparse C<basename>, C<dirname> +=back + =head2 File::CheckTree, validate - run many filetest checks on a tree +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 File::Compare - Compare files or filehandles +=over + =item SYNOPSIS =item DESCRIPTION @@ -4522,15 +7027,19 @@ C<basename>, C<dirname> =item AUTHOR +=back + =head2 File::Copy - Copy files or filehandles +=over + =item SYNOPSIS =item DESCRIPTION =over -=item Special behavior if C<syscopy> is defined (VMS and OS/2) +=item Special behaviour if C<syscopy> is defined (OS/2, VMS and Win32) rmscopy($from,$to[,$date_flag]) @@ -4540,8 +7049,12 @@ rmscopy($from,$to[,$date_flag]) =item AUTHOR +=back + =head2 File::DosGlob - DOS like globbing and then some +=over + =item SYNOPSIS =item DESCRIPTION @@ -4556,15 +7069,47 @@ rmscopy($from,$to[,$date_flag]) =item SEE ALSO +=back + =head2 File::Find, find - traverse a file tree +=over + =item SYNOPSIS =item DESCRIPTION -=item BUGS +C<wanted>, C<bydepth>, C<follow>, C<follow_fast>, C<follow_skip>, +C<no_chdir>, C<untaint>, C<untaint_pattern>, C<untaint_skip> + +=item CAVEAT + +=back -=head2 File::Path - create or remove a series of directories +=head2 File::Glob - Perl extension for BSD glob routine + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +C<GLOB_ERR>, C<GLOB_MARK>, C<GLOB_NOCASE>, C<GLOB_NOCHECK>, C<GLOB_NOSORT>, +C<GLOB_BRACE>, C<GLOB_NOMAGIC>, C<GLOB_QUOTE>, C<GLOB_TILDE>, C<GLOB_CSH> + +=item DIAGNOSTICS + +C<GLOB_NOSPACE>, C<GLOB_ABEND> + +=item NOTES + +=item AUTHOR + +=back + +=head2 File::Path - create or remove directory trees + +=over =item SYNOPSIS @@ -4572,10 +7117,12 @@ rmscopy($from,$to[,$date_flag]) =item AUTHORS -=item REVISION +=back =head2 File::Spec - portably perform operations on file names +=over + =item SYNOPSIS =item DESCRIPTION @@ -4584,63 +7131,234 @@ rmscopy($from,$to[,$date_flag]) =item AUTHORS +=back + +=head2 File::Spec::Functions - portably perform operations on file names + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=over + +=item Exports + +=back + +=item SEE ALSO + +=back + =head2 File::Spec::Mac - File::Spec for MacOS +=over + =item SYNOPSIS =item DESCRIPTION =item METHODS -canonpath, catdir, catfile, curdir, rootdir, updir, file_name_is_absolute, +canonpath + +=back + +catdir + +catfile + +curdir + +devnull + +rootdir + +tmpdir + +updir + +file_name_is_absolute + path +splitpath + +splitdir + +catpath + +abs2rel + +rel2abs + +=over + =item SEE ALSO +=back + =head2 File::Spec::OS2 - methods for OS/2 file specs +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 File::Spec::Unix - methods used by File::Spec +=over + =item SYNOPSIS =item DESCRIPTION =item METHODS -canonpath, catdir, catfile, curdir, rootdir, updir, no_upwards, -file_name_is_absolute, path, join, nativename +canonpath + +=back + +catdir + +catfile + +curdir + +devnull + +rootdir + +tmpdir + +updir + +no_upwards + +case_tolerant + +file_name_is_absolute + +path + +join + +splitpath + +splitdir + +catpath + +abs2rel + +rel2abs + +=over =item SEE ALSO +=back + =head2 File::Spec::VMS - methods for VMS file specs +=over + =item SYNOPSIS =item DESCRIPTION +eliminate_macros + +=back + +fixpath + =over =item Methods always loaded -catdir, catfile, curdir (override), rootdir (override), updir (override), -path (override), file_name_is_absolute (override) +canonpath (override) + +=back + +catdir + +catfile + +curdir (override) + +devnull (override) + +rootdir (override) + +tmpdir (override) + +updir (override) + +case_tolerant (override) + +path (override) + +file_name_is_absolute (override) + +splitpath (override) + +splitdir (override) + +catpath (override) + +abs2rel (override) + +rel2abs (override) + +=over + +=item SEE ALSO =back =head2 File::Spec::Win32 - methods for Win32 file specs +=over + =item SYNOPSIS =item DESCRIPTION -catfile, canonpath +devnull + +=back + +tmpdir + +catfile + +canonpath + +splitpath + +splitdir + +catpath + +abs2rel + +rel2abs + +=over + +=item SEE ALSO + +=back =head2 File::stat - by-name interface to Perl's built-in stat() functions +=over + =item SYNOPSIS =item DESCRIPTION @@ -4649,16 +7367,24 @@ catfile, canonpath =item AUTHOR +=back + =head2 FileCache - keep more files open than the system permits +=over + =item SYNOPSIS =item DESCRIPTION =item BUGS +=back + =head2 FileHandle - supply object methods for filehandles +=over + =item SYNOPSIS =item DESCRIPTION @@ -4667,8 +7393,12 @@ $fh->print, $fh->printf, $fh->getline, $fh->getlines =item SEE ALSO +=back + =head2 FindBin - Locate directory of original perl script +=over + =item SYNOPSIS =item DESCRIPTION @@ -4681,10 +7411,12 @@ $fh->print, $fh->printf, $fh->getline, $fh->getlines =item COPYRIGHT -=item REVISION +=back =head2 GDBM_File - Perl5 access to the gdbm library. +=over + =item SYNOPSIS =item DESCRIPTION @@ -4695,77 +7427,385 @@ $fh->print, $fh->printf, $fh->getline, $fh->getlines =item SEE ALSO -=head2 Getopt::Long, GetOptions - extended processing of command line -options +=back + +=head2 Getopt::Long - Extended processing of command line options + +=over =item SYNOPSIS =item DESCRIPTION -!, +, :s, :i, :f +=item Command Line Options, an Introduction + +=item Getting Started with Getopt::Long =over -=item Linkage specification +=item Simple options -=item Aliases and abbreviations +=item A little bit less simple options -=item Non-option call-back routine +=item Mixing command line option with other arguments -=item Option starters +=item Options with values -=item Return values and Errors +=item Options with multiple values + +=item Options with hash values + +=item User-defined subroutines to handle options + +=item Options with multiple names + +=item Case and abbreviations + +=item Summary of Option Specifications + +!, +, s, i, f, : I<type> [ I<desttype> ] =back -=item COMPATIBILITY +=item Advanced Possibilities -=item EXAMPLES +=over + +=item Documentation and help texts + +=item Storing options in a hash + +=item Bundling + +=item The lonesome dash + +=item Argument call-back + +=back -=item CONFIGURATION OPTIONS +=item Configuring Getopt::Long default, auto_abbrev, getopt_compat, require_order, permute, bundling -(default: reset), bundling_override (default: reset), ignore_case +(default: reset), bundling_override (default: reset), ignore_case (default: set), ignore_case_always (default: reset), pass_through (default: reset), prefix, prefix_pattern, debug (default: reset) -=item OTHER USEFUL VARIABLES +=item Return values and Errors + +=item Legacy + +=over + +=item Default destinations + +=item Alternative option starters -$Getopt::Long::VERSION, $Getopt::Long::error +=item Configuration variables + +=back =item AUTHOR =item COPYRIGHT AND DISCLAIMER +=back + =head2 Getopt::Std, getopt - Process single-character switches with switch clustering +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 I18N::Collate - compare 8-bit scalar data according to the current locale +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 IO - load various IO modules +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=back + +=head2 IO::Dir - supply object methods for directory handles + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +new ( [ DIRNAME ] ), open ( DIRNAME ), read (), seek ( POS ), tell (), +rewind (), close (), tie %hash, IO::Dir, DIRNAME [, OPTIONS ] + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 IO::File - supply object methods for filehandles + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CONSTRUCTOR + +new ( FILENAME [,MODE [,PERMS]] ), new_tmpfile + +=item METHODS + +open( FILENAME [,MODE [,PERMS]] ) + +=item SEE ALSO + +=item HISTORY + +=back + +=head2 IO::Handle - supply object methods for I/O handles + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CONSTRUCTOR + +new (), new_from_fd ( FD, MODE ) + +=item METHODS + +$io->fdopen ( FD, MODE ), $io->opened, $io->getline, $io->getlines, +$io->ungetc ( ORD ), $io->write ( BUF, LEN [, OFFSET ] ), $io->error, +$io->clearerr, $io->sync, $io->flush, $io->printflush ( ARGS ), +$io->blocking ( [ BOOL ] ), $io->untaint + +=item NOTE + +=item SEE ALSO + +=item BUGS + +=item HISTORY + +=back + +=head2 IO::Pipe - supply object methods for pipes + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CONSTRUCTOR + +new ( [READER, WRITER] ) + +=item METHODS + +reader ([ARGS]), writer ([ARGS]), handles () + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 IO::Poll - Object interface to system poll call + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item METHODS + +mask ( IO [, EVENT_MASK ] ), poll ( [ TIMEOUT ] ), events ( IO ), remove ( +IO ), handles( [ EVENT_MASK ] ) + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 IO::Seekable - supply seek based methods for I/O objects + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item SEE ALSO + +=item HISTORY + +=back + +=head2 IO::Select - OO interface to the select system call + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CONSTRUCTOR + +new ( [ HANDLES ] ) + +=item METHODS + +add ( HANDLES ), remove ( HANDLES ), exists ( HANDLE ), handles, can_read ( +[ TIMEOUT ] ), can_write ( [ TIMEOUT ] ), has_exception ( [ TIMEOUT ] ), +count (), bits(), select ( READ, WRITE, ERROR [, TIMEOUT ] ) + +=item EXAMPLE + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 IO::Socket - Object interface to socket communications + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CONSTRUCTOR + +new ( [ARGS] ) + +=item METHODS + +accept([PKG]), socketpair(DOMAIN, TYPE, PROTOCOL), timeout([VAL]), +sockopt(OPT [, VAL]), sockdomain, socktype, protocol, connected + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 IO::Socket::INET - Object interface for AF_INET domain sockets + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CONSTRUCTOR + +new ( [ARGS] ) + +=over + +=item METHODS + +sockaddr (), sockport (), sockhost (), peeraddr (), peerport (), peerhost +() + +=back + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 IO::Socket::UNIX - Object interface for AF_UNIX domain sockets + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CONSTRUCTOR + +new ( [ARGS] ) + +=item METHODS + +hostpath(), peerpath() + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 IO::lib::IO::Dir, IO::Dir - supply object methods for directory +handles + +=over + =item SYNOPSIS =item DESCRIPTION +new ( [ DIRNAME ] ), open ( DIRNAME ), read (), seek ( POS ), tell (), +rewind (), close (), tie %hash, IO::Dir, DIRNAME [, OPTIONS ] + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + =head2 IO::lib::IO::File, IO::File - supply object methods for filehandles +=over + =item SYNOPSIS =item DESCRIPTION =item CONSTRUCTOR -new ([ ARGS ] ), new_tmpfile +new ( FILENAME [,MODE [,PERMS]] ), new_tmpfile =item METHODS @@ -4775,9 +7815,13 @@ open( FILENAME [,MODE [,PERMS]] ) =item HISTORY +=back + =head2 IO::lib::IO::Handle, IO::Handle - supply object methods for I/O handles +=over + =item SYNOPSIS =item DESCRIPTION @@ -4788,9 +7832,10 @@ new (), new_from_fd ( FD, MODE ) =item METHODS -$fh->fdopen ( FD, MODE ), $fh->opened, $fh->getline, $fh->getlines, -$fh->ungetc ( ORD ), $fh->write ( BUF, LEN [, OFFSET }\] ), $fh->flush, -$fh->error, $fh->clearerr, $fh->untaint +$io->fdopen ( FD, MODE ), $io->opened, $io->getline, $io->getlines, +$io->ungetc ( ORD ), $io->write ( BUF, LEN [, OFFSET ] ), $io->error, +$io->clearerr, $io->sync, $io->flush, $io->printflush ( ARGS ), +$io->blocking ( [ BOOL ] ), $io->untaint =item NOTE @@ -4800,13 +7845,17 @@ $fh->error, $fh->clearerr, $fh->untaint =item HISTORY -=head2 IO::lib::IO::Pipe, IO::pipe - supply object methods for pipes +=back + +=head2 IO::lib::IO::Pipe, IO::Pipe - supply object methods for pipes + +=over =item SYNOPSIS =item DESCRIPTION -=item CONSTRCUTOR +=item CONSTRUCTOR new ( [READER, WRITER] ) @@ -4820,9 +7869,34 @@ reader ([ARGS]), writer ([ARGS]), handles () =item COPYRIGHT +=back + +=head2 IO::lib::IO::Poll, IO::Poll - Object interface to system poll call + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item METHODS + +mask ( IO [, EVENT_MASK ] ), poll ( [ TIMEOUT ] ), events ( IO ), remove ( +IO ), handles( [ EVENT_MASK ] ) + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + =head2 IO::lib::IO::Seekable, IO::Seekable - supply seek based methods for I/O objects +=over + =item SYNOPSIS =item DESCRIPTION @@ -4831,9 +7905,13 @@ I/O objects =item HISTORY +=back + =head2 IO::lib::IO::Select, IO::Select - OO interface to the select system call +=over + =item SYNOPSIS =item DESCRIPTION @@ -4845,8 +7923,8 @@ new ( [ HANDLES ] ) =item METHODS add ( HANDLES ), remove ( HANDLES ), exists ( HANDLE ), handles, can_read ( -[ TIMEOUT ] ), can_write ( [ TIMEOUT ] ), has_error ( [ TIMEOUT ] ), count -(), bits(), bits(), select ( READ, WRITE, ERROR [, TIMEOUT ] ) +[ TIMEOUT ] ), can_write ( [ TIMEOUT ] ), has_exception ( [ TIMEOUT ] ), +count (), bits(), select ( READ, WRITE, ERROR [, TIMEOUT ] ) =item EXAMPLE @@ -4854,9 +7932,13 @@ add ( HANDLES ), remove ( HANDLES ), exists ( HANDLE ), handles, can_read ( =item COPYRIGHT +=back + =head2 IO::lib::IO::Socket, IO::Socket - Object interface to socket communications +=over + =item SYNOPSIS =item DESCRIPTION @@ -4867,36 +7949,98 @@ new ( [ARGS] ) =item METHODS -accept([PKG]), timeout([VAL]), sockopt(OPT [, VAL]), sockdomain, socktype, -protocol +accept([PKG]), socketpair(DOMAIN, TYPE, PROTOCOL), timeout([VAL]), +sockopt(OPT [, VAL]), sockdomain, socktype, protocol, connected -=item SUB-CLASSES +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 IO::lib::IO::Socket::INET, IO::Socket::INET - Object interface for +AF_INET domain sockets =over -=item IO::Socket::INET +=item SYNOPSIS + +=item DESCRIPTION + +=item CONSTRUCTOR + +new ( [ARGS] ) + +=over =item METHODS sockaddr (), sockport (), sockhost (), peeraddr (), peerport (), peerhost () -=item IO::Socket::UNIX +=back + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 IO::lib::IO::Socket::UNIX, IO::Socket::UNIX - Object interface for +AF_UNIX domain sockets + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CONSTRUCTOR + +new ( [ARGS] ) =item METHODS hostpath(), peerpath() +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + =back +=head2 IPC::Msg - SysV Msg IPC object class + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item METHODS + +new ( KEY , FLAGS ), id, rcv ( BUF, LEN [, TYPE [, FLAGS ]] ), remove, set +( STAT ), set ( NAME => VALUE [, NAME => VALUE ...] ), snd ( TYPE, MSG [, +FLAGS ] ), stat + =item SEE ALSO =item AUTHOR =item COPYRIGHT +=back + =head2 IPC::Open2, open2 - open a process for both reading and writing +=over + =item SYNOPSIS =item DESCRIPTION @@ -4905,17 +8049,48 @@ hostpath(), peerpath() =item SEE ALSO +=back + =head2 IPC::Open3, open3 - open a process for reading, writing, and error handling +=over + =item SYNOPSIS =item DESCRIPTION =item WARNING +=back + +=head2 IPC::Semaphore - SysV Semaphore IPC object class + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item METHODS + +new ( KEY , NSEMS , FLAGS ), getall, getncnt ( SEM ), getpid ( SEM ), +getval ( SEM ), getzcnt ( SEM ), id, op ( OPLIST ), remove, set ( STAT ), +set ( NAME => VALUE [, NAME => VALUE ...] ), setall ( VALUES ), setval ( N +, VALUE ), stat + +=item SEE ALSO + +=item AUTHOR + +=item COPYRIGHT + +=back + =head2 IPC::SysV - SysV IPC constants +=over + =item SYNOPSIS =item DESCRIPTION @@ -4928,8 +8103,12 @@ ftok( PATH, ID ) =item COPYRIGHT +=back + =head2 IPC::SysV::Msg, IPC::Msg - SysV Msg IPC object class +=over + =item SYNOPSIS =item DESCRIPTION @@ -4946,9 +8125,13 @@ FLAGS ] ), stat =item COPYRIGHT +=back + =head2 IPC::SysV::Semaphore, IPC::Semaphore - SysV Semaphore IPC object class +=over + =item SYNOPSIS =item DESCRIPTION @@ -4966,20 +8149,29 @@ set ( NAME => VALUE [, NAME => VALUE ...] ), setall ( VALUES ), setval ( N =item COPYRIGHT +=back + =head2 Math::BigFloat - Arbitrary length float math package +=over + =item SYNOPSIS =item DESCRIPTION -number format, Error returns 'NaN', Division is computed to +number format, Error returns 'NaN', Division is computed to, Rounding is +performed =item BUGS =item AUTHOR +=back + =head2 Math::BigInt - Arbitrary size integer math package +=over + =item SYNOPSIS =item DESCRIPTION @@ -4994,31 +8186,12 @@ Canonical notation, Input, Output =item AUTHOR -=head2 Math::Complex - complex numbers and associated mathematical -functions - -=item SYNOPSIS - -=item DESCRIPTION - -=item OPERATIONS - -=item CREATION - -=item STRINGIFICATION - -=item USAGE - -=item ERRORS DUE TO DIVISION BY ZERO OR LOGARITHM OF ZERO - -=item ERRORS DUE TO INDIGESTIBLE ARGUMENTS - -=item BUGS - -=item AUTHORS +=back =head2 Math::Trig - trigonometric functions +=over + =item SYNOPSIS =item DESCRIPTION @@ -5058,14 +8231,22 @@ cylindrical_to_spherical, spherical_to_cartesian, spherical_to_cylindrical =item AUTHORS +=back + =head2 NDBM_File - Tied access to ndbm files +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 Net::Ping - check a remote host for reachability +=over + =item SYNOPSIS =item DESCRIPTION @@ -5083,9 +8264,13 @@ $timeout]);, $p->close();, pingecho($host [, $timeout]); =item NOTES +=back + =head2 Net::hostent - by-name interface to Perl's built-in gethost*() functions +=over + =item SYNOPSIS =item DESCRIPTION @@ -5096,9 +8281,13 @@ functions =item AUTHOR +=back + =head2 Net::netent - by-name interface to Perl's built-in getnet*() functions +=over + =item SYNOPSIS =item DESCRIPTION @@ -5109,9 +8298,13 @@ functions =item AUTHOR +=back + =head2 Net::protoent - by-name interface to Perl's built-in getproto*() functions +=over + =item SYNOPSIS =item DESCRIPTION @@ -5120,9 +8313,13 @@ functions =item AUTHOR +=back + =head2 Net::servent - by-name interface to Perl's built-in getserv*() functions +=over + =item SYNOPSIS =item DESCRIPTION @@ -5133,14 +8330,38 @@ functions =item AUTHOR +=back + +=head2 O - Generic interface to Perl Compiler backends + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item CONVENTIONS + +=item IMPLEMENTATION + +=item AUTHOR + +=back + =head2 ODBM_File - Tied access to odbm files +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 Opcode - Disable named opcodes when compiling perl code +=over + =item SYNOPSIS =item DESCRIPTION @@ -5165,6 +8386,10 @@ opdump (PAT) =item TO DO (maybe) +=back + +=over + =item Predefined Opcode Tags :base_core, :base_mem, :base_loop, :base_io, :base_orig, :base_math, @@ -5176,9 +8401,13 @@ opdump (PAT) =item AUTHORS +=back + =head2 Opcode::Safe, Safe - Compile and execute code in restricted compartments +=over + =item SYNOPSIS =item DESCRIPTION @@ -5206,17 +8435,25 @@ Memory, CPU, Snooping, Signals, State Changes =back +=back + =head2 Opcode::ops, ops - Perl pragma to restrict unsafe operations when compiling -=item SYNOPSIS +=over + +=item SYNOPSIS =item DESCRIPTION =item SEE ALSO +=back + =head2 POSIX - Perl interface to IEEE Std 1003.1 +=over + =item SYNOPSIS =item DESCRIPTION @@ -5342,57 +8579,1002 @@ Constants, Macros =item CREATION +=back + +=head2 Pod::Checker, podchecker() - check pod documents for syntax errors + +=over + +=item SYNOPSIS + +=item OPTIONS/ARGUMENTS + +=over + +=item podchecker() + +B<-warnings> =E<gt> I<val> + +=back + +=item DESCRIPTION + +=item DIAGNOSTICS + +=over + +=item Errors + +empty =headn, =over on line I<N> without closing =back, =item without +previous =over, =back without previous =over, No argument for =begin, =end +without =begin, Nested =begin's, =for without formatter specification, +unresolved internal link I<NAME>, Unknown command "I<CMD>", Unknown +interior-sequence "I<SEQ>", nested commands +I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>, garbled entity I<STRING>, Entity +number out of range, malformed link LE<lt>E<gt>, nonempty ZE<lt>E<gt>, +empty XE<lt>E<gt>, Spurious text after =pod / =cut, Spurious character(s) +after =back + +=item Warnings + +multiple occurence of link target I<name>, line containing nothing but +whitespace in paragraph, file does not start with =head, No numeric +argument for =over, previous =item has no contents, preceding non-item +paragraph(s), =item type mismatch (I<one> vs. I<two>), I<N> unescaped +C<E<lt>E<gt>> in paragraph, Unknown entity, No items in =over, No argument +for =item, empty section in previous paragraph, Verbatim paragraph in NAME +section, Hyperlinks + +=back + +=item RETURN VALUE + +=item EXAMPLES + +=item INTERFACE + +=back + +C<$checker-E<gt>poderror( @args )>, C<$checker-E<gt>poderror( {%opts}, +@args )> + +C<$checker-E<gt>num_errors()> + +C<$checker-E<gt>name()> + +C<$checker-E<gt>node()> + +C<$checker-E<gt>idx()> + +C<$checker-E<gt>hyperlink()> + +=over + +=item AUTHOR + +=back + +=head2 Pod::Find - find POD documents in directory trees + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item OPTIONS + +B<-verbose>, B<-perl>, B<-script>, B<-inc> + +=item AUTHOR + +=item SEE ALSO + +=back + =head2 Pod::Html - module to convert pod files to HTML +=over + =item SYNOPSIS =item DESCRIPTION =item ARGUMENTS -help, htmlroot, infile, outfile, podroot, podpath, libpods, netscape, -nonetscape, index, noindex, recurse, norecurse, title, verbose +backlink, css, flush, header, help, htmldir, htmlroot, index, infile, +libpods, netscape, outfile, podpath, podroot, quiet, recurse, title, +verbose =item EXAMPLE +=item ENVIRONMENT + +=item AUTHOR + +=item SEE ALSO + +=item COPYRIGHT + +=back + +=head2 Pod::InputObjects - objects representing POD input paragraphs, +commands, etc. + +=over + +=item SYNOPSIS + +=item REQUIRES + +=item EXPORTS + +=item DESCRIPTION + +B<Pod::InputSource>, B<Pod::Paragraph>, B<Pod::InteriorSequence>, +B<Pod::ParseTree> + +=back + +=over + +=item B<Pod::InputSource> + +=back + +=over + +=item B<new()> + +=back + +=over + +=item B<name()> + +=back + +=over + +=item B<handle()> + +=back + +=over + +=item B<was_cutting()> + +=back + +=over + +=item B<Pod::Paragraph> + +=back + +=over + +=item B<new()> + +=back + +=over + +=item B<cmd_name()> + +=back + +=over + +=item B<text()> + +=back + +=over + +=item B<raw_text()> + +=back + +=over + +=item B<cmd_prefix()> + +=back + +=over + +=item B<cmd_separator()> + +=back + +=over + +=item B<parse_tree()> + +=back + +=over + +=item B<file_line()> + +=back + +=over + +=item B<Pod::InteriorSequence> + +=back + +=over + +=item B<new()> + +=back + +=over + +=item B<cmd_name()> + +=back + +=over + +=item B<prepend()> + +=back + +=over + +=item B<append()> + +=back + +=over + +=item B<nested()> + +=back + +=over + +=item B<raw_text()> + +=back + +=over + +=item B<left_delimiter()> + +=back + +=over + +=item B<right_delimiter()> + +=back + +=over + +=item B<parse_tree()> + +=back + +=over + +=item B<file_line()> + +=back + +=over + +=item B<DESTROY()> + +=back + +=over + +=item B<Pod::ParseTree> + +=back + +=over + +=item B<new()> + +=back + +=over + +=item B<top()> + +=back + +=over + +=item B<children()> + +=back + +=over + +=item B<prepend()> + +=back + +=over + +=item B<append()> + +=back + +=over + +=item B<raw_text()> + +=back + +=over + +=item B<DESTROY()> + +=back + +=over + +=item SEE ALSO + =item AUTHOR +=back + +=head2 Pod::Man - Convert POD data to formatted *roff input + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +center, date, fixed, fixedbold, fixeditalic, fixedbolditalic, release, +section + +=item DIAGNOSTICS + +roff font should be 1 or 2 chars, not `%s', Invalid link %s, Unknown escape +EE<lt>%sE<gt>, Unknown sequence %s, Unmatched =back + =item BUGS =item SEE ALSO -=item COPYRIGHT +=item AUTHOR + +=back + +=head2 Pod::ParseUtils - helpers for POD parsing and conversion + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=back + +=over + +=item Pod::List + +new() + +=back + +file() + +start() + +indent() + +type() + +rx() + +item() + +parent() + +tag() + +=over + +=item Pod::Hyperlink + +new() + +=back + +parse($string) + +markup($string) + +text() + +warning() + +line(), file() + +page() + +node() + +alttext() + +type() + +link() + +=over + +=item Pod::Cache + +new() + +=back + +item() + +find_page($name) + +=over + +=item Pod::Cache::Item + +new() + +=back + +page() + +description() + +path() + +file() + +nodes() + +find_node($name) + +idx() + +=over + +=item AUTHOR + +=item SEE ALSO + +=back + +=head2 Pod::Parser - base class for creating POD filters and translators + +=over + +=item SYNOPSIS + +=item REQUIRES + +=item EXPORTS + +=item DESCRIPTION + +=item QUICK OVERVIEW + +=item PARSING OPTIONS + +B<-want_nonPODs> (default: unset), B<-process_cut_cmd> (default: unset), +B<-warnings> (default: unset) + +=back + +=over + +=item RECOMMENDED SUBROUTINE/METHOD OVERRIDES + +=back + +=over + +=item B<command()> + +C<$cmd>, C<$text>, C<$line_num>, C<$pod_para> + +=back + +=over + +=item B<verbatim()> + +C<$text>, C<$line_num>, C<$pod_para> + +=back + +=over + +=item B<textblock()> + +C<$text>, C<$line_num>, C<$pod_para> + +=back + +=over + +=item B<interior_sequence()> + +=back + +=over + +=item OPTIONAL SUBROUTINE/METHOD OVERRIDES + +=back + +=over + +=item B<new()> + +=back + +=over + +=item B<initialize()> + +=back + +=over + +=item B<begin_pod()> + +=back + +=over + +=item B<begin_input()> + +=back + +=over + +=item B<end_input()> + +=back + +=over + +=item B<end_pod()> + +=back + +=over + +=item B<preprocess_line()> + +=back + +=over + +=item B<preprocess_paragraph()> + +=back + +=over + +=item METHODS FOR PARSING AND PROCESSING + +=back + +=over + +=item B<parse_text()> + +B<-expand_seq> =E<gt> I<code-ref>|I<method-name>, B<-expand_text> =E<gt> +I<code-ref>|I<method-name>, B<-expand_ptree> =E<gt> +I<code-ref>|I<method-name> + +=back + +=over + +=item B<interpolate()> + +=back + +=over + +=item B<parse_paragraph()> + +=back + +=over + +=item B<parse_from_filehandle()> + +=back + +=over + +=item B<parse_from_file()> + +=back + +=over + +=item ACCESSOR METHODS + +=back + +=over + +=item B<errorsub()> + +=back + +=over + +=item B<cutting()> + +=back + +=over + +=item B<parseopts()> + +=back + +=over + +=item B<output_file()> + +=back + +=over + +=item B<output_handle()> + +=back + +=over + +=item B<input_file()> + +=back + +=over + +=item B<input_handle()> + +=back + +=over + +=item B<input_streams()> + +=back + +=over + +=item B<top_stream()> + +=back + +=over + +=item PRIVATE METHODS AND DATA + +=back + +=over + +=item B<_push_input_stream()> + +=back + +=over + +=item B<_pop_input_stream()> + +=back + +=over + +=item TREE-BASED PARSING + +=item SEE ALSO + +=item AUTHOR + +=back + +=head2 Pod::Plainer - Perl extension for converting Pod to old style Pod. + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=over + +=item EXPORT -=head2 Pod::Text - convert POD data to formatted ASCII text +=back + +=item AUTHOR + +=item SEE ALSO + +=back + +=head2 Pod::Select, podselect() - extract selected sections of POD from +input + +=over =item SYNOPSIS +=item REQUIRES + +=item EXPORTS + =item DESCRIPTION +=item SECTION SPECIFICATIONS + +=item RANGE SPECIFICATIONS + +=back + +=over + +=item OBJECT METHODS + +=back + +=over + +=item B<curr_headings()> + +=back + +=over + +=item B<select()> + +=back + +=over + +=item B<add_selection()> + +=back + +=over + +=item B<clear_selections()> + +=back + +=over + +=item B<match_section()> + +=back + +=over + +=item B<is_selected()> + +=back + +=over + +=item EXPORTED FUNCTIONS + +=back + +=over + +=item B<podselect()> + +B<-output>, B<-sections>, B<-ranges> + +=back + +=over + +=item PRIVATE METHODS AND DATA + +=back + +=over + +=item B<_compile_section_spec()> + +=back + +=over + +=item $self->{_SECTION_HEADINGS} + +=back + +=over + +=item $self->{_SELECTED_SECTIONS} + +=back + +=over + +=item SEE ALSO + =item AUTHOR -=item TODO +=back + +=head2 Pod::Text - Convert POD data to formatted ASCII text + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +alt, indent, loose, sentence, width + +=item DIAGNOSTICS + +Bizarre space in item, Can't open %s for reading: %s, Unknown escape: %s, +Unknown sequence: %s, Unmatched =back + +=item RESTRICTIONS + +=item NOTES + +=item SEE ALSO + +=item AUTHOR + +=back + +=head2 Pod::Text::Color - Convert POD data to formatted color ASCII text + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item BUGS + +=item SEE ALSO + +=item AUTHOR + +=back + +=head2 Pod::Text::Termcap, Pod::Text::Color - Convert POD data to ASCII +text with format escapes + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item SEE ALSO + +=item AUTHOR + +=back + +=head2 Pod::Usage, pod2usage() - print a usage message from embedded pod +documentation + +=over + +=item SYNOPSIS + +=item ARGUMENTS + +C<-message>, C<-msg>, C<-exitval>, C<-verbose>, C<-output>, C<-input>, +C<-pathlist> + +=item DESCRIPTION + +=item EXAMPLES + +=over + +=item Recommended Use + +=back + +=item CAVEATS + +=item AUTHOR + +=item ACKNOWLEDGEMENTS + +=back =head2 SDBM_File - Tied access to sdbm files +=over + =item SYNOPSIS =item DESCRIPTION +=back + +=head2 Safe - Compile and execute code in restricted compartments + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +a new namespace, an operator mask + +=item WARNING + +=over + +=item RECENT CHANGES + +=item Methods in class Safe + +permit (OP, ...), permit_only (OP, ...), deny (OP, ...), deny_only (OP, +...), trap (OP, ...), untrap (OP, ...), share (NAME, ...), share_from +(PACKAGE, ARRAYREF), varglob (VARNAME), reval (STRING), rdo (FILENAME), +root (NAMESPACE), mask (MASK) + +=item Some Safety Issues + +Memory, CPU, Snooping, Signals, State Changes + +=item AUTHOR + +=back + +=back + =head2 Search::Dict, look - search for key in dictionary file +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 SelectSaver - save and restore selected file handle +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 SelfLoader - load functions only on demand +=over + =item SYNOPSIS =item DESCRIPTION @@ -5415,17 +9597,25 @@ nonetscape, index, noindex, recurse, norecurse, title, verbose =item Multiple packages and fully qualified subroutine names +=back + =head2 Shell - run shell commands transparently within perl +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 Socket, sockaddr_in, sockaddr_un, inet_aton, inet_ntoa - load the C socket.h defines and structure manipulators +=over + =item SYNOPSIS =item DESCRIPTION @@ -5436,23 +9626,35 @@ SOCKADDR_IN, pack_sockaddr_in PORT, IP_ADDRESS, unpack_sockaddr_in SOCKADDR_IN, sockaddr_un PATHNAME, sockaddr_un SOCKADDR_UN, pack_sockaddr_un PATH, unpack_sockaddr_un SOCKADDR_UN +=back + =head2 Symbol - manipulate Perl symbols and their names +=over + =item SYNOPSIS =item DESCRIPTION +=back + =head2 Sys::Hostname - Try every conceivable way to get hostname +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 Syslog, Sys::Syslog, openlog, closelog, setlogmask, syslog - Perl interface to the UNIX syslog(3) calls +=over + =item SYNOPSIS =item DESCRIPTION @@ -5463,22 +9665,69 @@ closelog =item EXAMPLES -=item DEPENDENCIES +=item SEE ALSO + +=item AUTHOR + +=back + +=head2 Syslog::Syslog, Sys::Syslog, openlog, closelog, setlogmask, syslog - +Perl interface to the UNIX syslog(3) calls + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +openlog $ident, $logopt, $facility, syslog $priority, $format, @args, +setlogmask $mask_priority, setlogsock $sock_type (added in 5.004_02), +closelog + +=item EXAMPLES =item SEE ALSO =item AUTHOR +=back + +=head2 Term::ANSIColor - Color screen output using ANSI escape sequences + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item DIAGNOSTICS + +Invalid attribute name %s, Identifier %s used only once: possible typo, No +comma allowed after filehandle, Bareword %s not allowed while "strict subs" +in use + +=item RESTRICTIONS + +=item AUTHORS + +=back + =head2 Term::Cap - Perl termcap interface +=over + =item SYNOPSIS =item DESCRIPTION =item EXAMPLES +=back + =head2 Term::Complete - Perl word completion module +=over + =item SYNOPSIS =item DESCRIPTION @@ -5491,9 +9740,13 @@ E<lt>tabE<gt>, ^D, ^U, E<lt>delE<gt>, E<lt>bsE<gt> =item AUTHOR +=back + =head2 Term::ReadLine - Perl interface to various C<readline> packages. If no real package is found, substitutes stubs instead of basic functions. +=over + =item SYNOPSIS =item DESCRIPTION @@ -5511,7 +9764,11 @@ C<tkRunning>, C<ornaments>, C<newTTY> =item ENVIRONMENT -=head2 Test - provides a simple framework for writing test scripts +=back + +=head2 Test - provides a simple framework for writing test scripts + +=over =item SYNOPSIS @@ -5521,14 +9778,20 @@ C<tkRunning>, C<ornaments>, C<newTTY> NORMAL TESTS, SKIPPED TESTS, TODO TESTS +=item RETURN VALUE + =item ONFAIL =item SEE ALSO =item AUTHOR +=back + =head2 Test::Harness - run perl standard test scripts with statistics +=over + =item SYNOPSIS =item DESCRIPTION @@ -5556,17 +9819,25 @@ C<All tests successful.\nFiles=%d, Tests=%d, %s>, C<FAILED tests =item BUGS +=back + =head2 Text::Abbrev, abbrev - create an abbreviation table from a list +=over + =item SYNOPSIS =item DESCRIPTION =item EXAMPLE +=back + =head2 Text::ParseWords - parse text into an array of tokens or array of arrays +=over + =item SYNOPSIS =item DESCRIPTION @@ -5581,9 +9852,13 @@ backslashed double-quote) =item AUTHORS +=back + =head2 Text::Soundex - Implementation of the Soundex Algorithm as Described by Knuth +=over + =item SYNOPSIS =item DESCRIPTION @@ -5594,9 +9869,13 @@ by Knuth =item AUTHOR +=back + =head2 Text::Tabs -- expand and unexpand tabs per the unix expand(1) and unexpand(1) +=over + =item SYNOPSIS =item DESCRIPTION @@ -5605,19 +9884,26 @@ unexpand(1) =item AUTHOR +=back + =head2 Text::Wrap - line wrapping to form simple paragraphs +=over + =item SYNOPSIS =item DESCRIPTION =item EXAMPLE -=item BUGS - =item AUTHOR -=head2 Thread - multithreading +=back + +=head2 Thread - manipulate threads in Perl (EXPERIMENTAL, subject to +change) + +=over =item SYNOPSIS @@ -5627,18 +9913,22 @@ unexpand(1) new \&start_sub, new \&start_sub, LIST, lock VARIABLE, async BLOCK;, Thread->self, Thread->list, cond_wait VARIABLE, cond_signal VARIABLE, -cond_broadcast VARIABLE +cond_broadcast VARIABLE, yield =item METHODS -join, eval, tid +join, eval, detach, equal, tid =item LIMITATIONS =item SEE ALSO +=back + =head2 Thread::Queue - thread-safe queues +=over + =item SYNOPSIS =item DESCRIPTION @@ -5649,8 +9939,12 @@ new, enqueue LIST, dequeue, dequeue_nb, pending =item SEE ALSO +=back + =head2 Thread::Semaphore - thread-safe semaphores +=over + =item SYNOPSIS =item DESCRIPTION @@ -5659,34 +9953,53 @@ new, enqueue LIST, dequeue, dequeue_nb, pending new, new NUMBER, down, down NUMBER, up, up NUMBER +=back + =head2 Thread::Signal - Start a thread which runs signal handlers reliably +=over + =item SYNOPSIS =item DESCRIPTION =item BUGS +=back + =head2 Thread::Specific - thread-specific keys +=over + =item SYNOPSIS +=item DESCRIPTION + +=back + =head2 Tie::Array - base class for tied arrays -=item SYNOPSIS +=over + +=item SYNOPSIS -=item DESCRIPTION +=item DESCRIPTION TIEARRAY classname, LIST, STORE this, index, value, FETCH this, index, -FETCHSIZE this, STORESIZE this, count, EXTEND this, count, CLEAR this, -DESTROY this, PUSH this, LIST, POP this, SHIFT this, UNSHIFT this, LIST, -SPLICE this, offset, length, LIST +FETCHSIZE this, STORESIZE this, count, EXTEND this, count, EXISTS this, +key, DELETE this, key, CLEAR this, DESTROY this, PUSH this, LIST, POP this, +SHIFT this, UNSHIFT this, LIST, SPLICE this, offset, length, LIST =item CAVEATS =item AUTHOR -=head2 Tie::Handle - base class definitions for tied handles +=back + +=head2 Tie::Handle, Tie::StdHandle - base class definitions for tied +handles + +=over =item SYNOPSIS @@ -5694,12 +10007,17 @@ SPLICE this, offset, length, LIST TIEHANDLE classname, LIST, WRITE this, scalar, length, offset, PRINT this, LIST, PRINTF this, format, LIST, READ this, scalar, length, offset, -READLINE this, GETC this, DESTROY this +READLINE this, GETC this, CLOSE this, OPEN this, filename, BINMODE this, +EOF this, TELL this, SEEK this, offset, whence, DESTROY this =item MORE INFORMATION +=back + =head2 Tie::Hash, Tie::StdHash - base class definitions for tied hashes +=over + =item SYNOPSIS =item DESCRIPTION @@ -5711,8 +10029,12 @@ this, NEXTKEY this, lastkey, EXISTS this, key, DELETE this, key, CLEAR this =item MORE INFORMATION +=back + =head2 Tie::RefHash - use references as hash keys +=over + =item SYNOPSIS =item DESCRIPTION @@ -5725,9 +10047,13 @@ this, NEXTKEY this, lastkey, EXISTS this, key, DELETE this, key, CLEAR this =item SEE ALSO +=back + =head2 Tie::Scalar, Tie::StdScalar - base class definitions for tied scalars +=over + =item SYNOPSIS =item DESCRIPTION @@ -5736,23 +10062,39 @@ TIESCALAR classname, LIST, FETCH this, STORE this, value, DESTROY this =item MORE INFORMATION +=back + =head2 Tie::SubstrHash - Fixed-table-size, fixed-key-length hashing +=over + =item SYNOPSIS =item DESCRIPTION =item CAVEATS +=back + =head2 Time::Local - efficiently compute time from local and GMT time +=over + =item SYNOPSIS =item DESCRIPTION +=item IMPLEMENTATION + +=item BUGS + +=back + =head2 Time::gmtime - by-name interface to Perl's built-in gmtime() function +=over + =item SYNOPSIS =item DESCRIPTION @@ -5761,9 +10103,13 @@ function =item AUTHOR +=back + =head2 Time::localtime - by-name interface to Perl's built-in localtime() function +=over + =item SYNOPSIS =item DESCRIPTION @@ -5772,16 +10118,24 @@ function =item AUTHOR +=back + =head2 Time::tm - internal object used by Time::gmtime and Time::localtime +=over + =item SYNOPSIS =item DESCRIPTION =item AUTHOR +=back + =head2 UNIVERSAL - base class for ALL classes (blessed references) +=over + =item SYNOPSIS =item DESCRIPTION @@ -5789,9 +10143,13 @@ function isa ( TYPE ), can ( METHOD ), VERSION ( [ REQUIRE ] ), UNIVERSAL::isa ( VAL, TYPE ), UNIVERSAL::can ( VAL, METHOD ) +=back + =head2 User::grent - by-name interface to Perl's built-in getgr*() functions +=over + =item SYNOPSIS =item DESCRIPTION @@ -5800,22 +10158,52 @@ functions =item AUTHOR +=back + =head2 User::pwent - by-name interface to Perl's built-in getpw*() functions +=over + =item SYNOPSIS =item DESCRIPTION +=over + +=item System Specifics + +=back + =item NOTE =item AUTHOR +=item HISTORY + +March 18th, 2000 + +=back + +=head2 XSLoader - Dynamically load C libraries into Perl code + +=over + +=item SYNOPSIS + +=item DESCRIPTION + +=item AUTHOR + +=back + =head1 AUXILIARY DOCUMENTATION Here should be listed all the extra programs' documentation, but they don't all have manual pages yet: +=over + =item a2p =item s2p @@ -5834,6 +10222,8 @@ don't all have manual pages yet: =item wrapsuid +=back + =head1 AUTHOR Larry Wall <F<larry@wall.org>>, with the help of oodles diff --git a/contrib/perl5/pod/perltodo.pod b/contrib/perl5/pod/perltodo.pod new file mode 100644 index 0000000..f22d473 --- /dev/null +++ b/contrib/perl5/pod/perltodo.pod @@ -0,0 +1,870 @@ +=head1 NAME + +perltodo - Perl TO-DO List + +=head1 DESCRIPTION + +This is a list of wishes for Perl. It is maintained by Nathan +Torkington for the Perl porters. Send updates to +I<perl5-porters@perl.org>. If you want to work on any of these +projects, be sure to check the perl5-porters archives for past ideas, +flames, and propaganda. This will save you time and also prevent you +from implementing something that Larry has already vetoed. One set +of archives may be found at: + + http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/ + + +=head1 Infrastructure + +=head2 Mailing list archives + +Chaim suggests contacting egroup and asking them to archive the other +perl.org mailing lists. Probably not advocacy, but definitely +perl6-porters, etc. + +=head2 Bug tracking system + +Richard Foley I<richard@perl.org> is writing one. We looked at +several, like gnats and the Debian system, but at the time we +investigated them, none met our needs. Since then, Jitterbug has +matured, and may be worth reinvestigation. + +The system we've developed is the recipient of perlbug mail, and any +followups it generates from perl5-porters. New bugs are entered +into a mysql database, and sent on to +perl5-porters with the subject line rewritten to include a "ticket +number" (unique ID for the new bug). If the incoming message already +had a ticket number in the subject line, then the message is logged +against that bug. There is a separate email interface (not forwarding +to p5p) that permits porters to claim, categorize, and close tickets. + +There is also a web interface to the system at http://bugs.perl.org. + +The current delay in implementation is caused by perl.org lockups. +One suspect is the mail handling system, possibly going into loops. + +We still desperately need a bugmaster, someone who will look at +every new "bug" and kill those that we already know about, those +that are not bugs at all, etc. + +=head2 Regression Tests + +The test suite for Perl serves two needs: ensuring features work, and +ensuring old bugs have not been reintroduced. Both need work. + +Brent LaVelle (lavelle@metronet.com) has stepped forward to work on +performance tests and improving the size of the test suite. + +=over 4 + +=item Coverage + +Do the tests that come with Perl exercise every line (or every block, +or ...) of the Perl interpreter, and if not then how can we make them +do so? + +=item Regression + +No bug fixes should be made without a corresponding testsuite addition. +This needs a dedicated enforcer, as the current pumpking is either too +lazy or too stupid or both and lets enforcement wander all over the +map. :-) + +=item __DIE__ + +Tests that fail need to be of a form that can be readily mailed +to perlbug and diagnosed with minimal back-and-forth's to determine +which test failed, due to what cause, etc. + +=item suidperl + +We need regression/sanity tests for suidperl + +=item The 25% slowdown from perl4 to perl5 + +This value may or may not be accurate, but it certainly is +eye-catching. For some things perl5 is faster than perl4, but often +the reliability and extensability have come at a cost of speed. The +benchmark suite that Gisle released earlier has been hailed as both a +fantastic solution and as a source of entirely meaningless figures. +Do we need to test "real applications"? Can you do so? Anyone have +machines to dedicate to the task? Identify the things that have grown +slower, and see if there's a way to make them faster. + +=back + +=head1 Configure + +Andy Dougherty maintain(ed|s) a list of "todo" items for the configure +that comes with Perl. See Porting/pumpkin.pod in the latest +source release. + +=head2 Install HTML + +Have "make install" give you the option to install HTML as well. This +would be part of Configure. Andy Wardley (certified Perl studmuffin) +will look into the current problems of HTML installation--is +'installhtml' preventing this from happening cleanly, or is pod2html +the problem? If the latter, Brad Appleton's pod work may fix the +problem for free. + +=head1 Perl Language + +=head2 our ($var) + +Declare global variables (lexically or otherwise). + +=head2 64-bit Perl + +Verify complete 64 bit support so that the value of sysseek, or C<-s>, or +stat(), or tell can fit into a perl number without losing precision. +Work with the perl-64bit mailing list on perl.org. + +=head2 Prototypes + +=over 4 + +=item Named prototypes + +Add proper named prototypes that actually work usefully. + +=item Indirect objects + +Fix prototype bug that forgets indirect objects. + +=item Method calls + +Prototypes for method calls. + +=item Context + +Return context prototype declarations. + +=item Scoped subs + +lexically-scoped subs, e.g. my sub + +=back + +=head1 Perl Internals + +=head2 magic_setisa + +C<magic_setisa> should be made to update %FIELDS [???] + +=head2 Garbage Collection + +There was talk of a mark-and-sweep garbage collector at TPC2, but the +(to users) unpredictable nature of its behaviour put some off. +Sarathy, I believe, did the work. Here's what he has to say: + +Yeah, I hope to implement it someday too. The points that were +raised in TPC2 were all to do with calling DESTROY() methods, but +I think we can accomodate that by extending bless() to stash +extra information for objects so we track their lifetime accurately +for those that want their DESTROY() to be predictable (this will be +a speed hit, naturally, and will therefore be optional, naturally. :) + +[N.B. Don't even ask me about this now! When I have the time to +write a cogent summary, I'll post it.] + +=head2 Reliable signals + +Sarathy and Dan Sugalski are working on this. Chip posted a patch +earlier, but it was not accepted into 5.005. The issue is tricky, +because it has the potential to greatly slow down the core. + +There are at least three things to consider: + +=over 4 + +=item Alternate runops() for signal despatch + +Sarathy and Dan are discussed this on perl5-porters. + +=item Figure out how to die() in delayed sighandler + +=item Add tests for Thread::Signal + +=item Automatic tests against CPAN + +Is there some way to automatically build all/most of CPAN with +the new Perl and check that the modules there pass all the tests? + +=back + +=head2 Interpolated regex performance bugs + + while (<>) { + $found = 0; + foreach $pat (@patterns) { + $found++ if /$pat/o; + } + print if $found; + } + +The qr// syntax added in 5.005 has solved this problem, but +it needs more thorough documentation. + +=head2 Memory leaks from failed eval/regcomp + +The only known memory leaks in Perl are in failed code or regexp +compilation. Fix this. Hugo Van Der Sanden will attempt this but +won't have tuits until January 1999. + +=head2 Make XS easier to use + +There was interest in SWIG from porters, but nothing has happened +lately. + +=head2 Make embedded Perl easier to use + +This is probably difficult for the same reasons that "XS For Dummies" +will be difficult. + +=head2 Namespace cleanup + + CPP-space: restrict CPP symbols exported from headers + header-space: move into CORE/perl/ + API-space: begin list of things that constitute public api + env-space: Configure should use PERL_CONFIG instead of CONFIG etc. + +=head2 MULTIPLICITY + +Complete work on safe recursive interpreters C<Perl-E<gt>new()>. +Sarathy says that a reference implementation exists. + +=head2 MacPerl + +Chris Nandor and Matthias Neeracher are working on better integrating +MacPerl into the Perl distribution. + +=head1 Documentation + +There's a lot of documentation that comes with Perl. The quantity of +documentation makes it difficult for users to know which section of +which manpage to read in order to solve their problem. Tom +Christiansen has done much of the documentation work in the past. + +=head2 A clear division into tutorial and reference + +Some manpages (e.g., perltoot and perlreftut) clearly set out to +educate the reader about a subject. Other manpages (e.g., perlsub) +are references for which there is no tutorial, or are references with +a slight tutorial bent. If things are either tutorial or reference, +then the reader knows which manpage to read to learn about a subject, +and which manpage to read to learn all about an aspect of that +subject. Part of the solution to this is: + +=head2 Remove the artificial distinction between operators and functions + +History shows us that users, and often porters, aren't clear on the +operator-function distinction. The present split in reference +material between perlfunc and perlop hinders user navigation. Given +that perlfunc is by far the larger of the two, move operator reference +into perlfunc. + +=head2 More tutorials + +More documents of a tutorial nature could help. Here are some +candidates: + +=over 4 + +=item Regular expressions + +Robin Berjon (r.berjon@ltconsulting.net) has volunteered. + +=item I/O + +Mark-Jason Dominus (mjd@plover.com) has an outline for perliotut. + +=item pack/unpack + +This is badly needed. There has been some discussion on the +subject on perl5-porters. + +=item Debugging + +Ronald Kimball (rjk@linguist.dartmouth.edu) has volunteered. + +=back + +=head2 Include a search tool + +perldoc should be able to 'grep' fulltext indices of installed POD +files. This would let people say: + + perldoc -find printing numbers with commas + +and get back the perlfaq entry on 'commify'. + +This solution, however, requires documentation to contain the keywords +the user is searching for. Even when the users know what they're +looking for, often they can't spell it. + +=head2 Include a locate tool + +perldoc should be able to help people find the manpages on a +particular high-level subject: + + perldoc -find web + +would tell them manpages, web pages, and books with material on web +programming. Similarly C<perldoc -find databases>, C<perldoc -find +references> and so on. + +We need something in the vicinity of: + + % perl -help random stuff + No documentation for perl function `random stuff' found + The following entry in perlfunc.pod matches /random/a: + =item rand EXPR + + =item rand + + Returns a random fractional number greater than or equal to C<0> and less + than the value of EXPR. (EXPR should be positive.) If EXPR is + omitted, the value C<1> is used. Automatically calls C<srand()> unless + C<srand()> has already been called. See also C<srand()>. + + (Note: If your rand function consistently returns numbers that are too + large or too small, then your version of Perl was probably compiled + with the wrong number of RANDBITS.) + The following pod pages seem to have /stuff/a: + perlfunc.pod (7 hits) + perlfaq7.pod (6 hits) + perlmod.pod (4 hits) + perlsyn.pod (3 hits) + perlfaq8.pod (2 hits) + perlipc.pod (2 hits) + perl5004delta.pod (1 hit) + perl5005delta.pod (1 hit) + perlcall.pod (1 hit) + perldelta.pod (1 hit) + perlfaq3.pod (1 hit) + perlfaq5.pod (1 hit) + perlhist.pod (1 hit) + perlref.pod (1 hit) + perltoc.pod (1 hit) + perltrap.pod (1 hit) + Proceed to open perlfunc.pod? [y] n + Do you want to speak perl interactively? [y] n + Should I dial 911? [y] n + Do you need psychiatric help? [y] y + <PELIZA> Hi, what bothers you today? + A Python programmer in the next cubby is driving me nuts! + <PELIZA> Hmm, thats fixable. Just [rest censored] + +=head2 Separate function manpages by default + +Perl should install 'manpages' for every function/operator into the +3pl or 3p manual section. By default. The splitman program in the +Perl source distribution does the work of turning big perlfunc into +little 3p pages. + +=head2 Users can't find the manpages + +Make C<perldoc> tell users what they need to add to their .login or +.cshrc to set their MANPATH correctly. + +=head2 Install ALL Documentation + +Make the standard documentation kit include the VMS, OS/2, Win32, +Threads, etc information. installperl and pod/Makefile should know +enough to copy README.foo to perlfoo.pod before building everything, +when appropriate. + +=head2 Outstanding issues to be documented + +Tom has a list of 5.005_5* features or changes that require +documentation. + +Create one document that coherently explains the delta between the +last camel release and the current release. perldelta was supposed +to be that, but no longer. The things in perldelta never seemed to +get placed in the right places in the real manpages, either. This +needs work. + +=head2 Adapt www.linuxhq.com for Perl + +This should help glorify documentation and get more people involved in +perl development. + +=head2 Replace man with a perl program + +Can we reimplement man in Perl? Tom has a start. I believe some of +the Linux systems distribute a manalike. Alternatively, build on +perldoc to remove the unfeatures like "is slow" and "has no apropos". + +=head2 Unicode tutorial + +We could use more work on helping people understand Perl's new +Unicode support that Larry has created. + +=head1 Modules + +=head2 Update the POSIX extension to conform with the POSIX 1003.1 Edition 2 + +The current state of the POSIX extension is as of Edition 1, 1991, +whereas the Edition 2 came out in 1996. ISO/IEC 9945:1-1996(E), +ANSI/IEEE Std 1003.1, 1996 Edition. ISBN 1-55937-573-6. The updates +were legion: threads, IPC, and real time extensions. + +=head2 Module versions + +Automate the checking of versions in the standard distribution so +it's easy for a pumpking to check whether CPAN has a newer version +that we should be including? + +=head2 New modules + +Which modules should be added to the standard distribution? This ties +in with the SDK discussed on the perl-sdk list at perl.org. + +=head2 Profiler + +Make the profiler (Devel::DProf) part of the standard release, and +document it well. + +=head2 Tie Modules + +=over 4 + +=item VecArray + +Implement array using vec(). Nathan Torkington has working code to +do this. + +=item SubstrArray + +Implement array using substr() + +=item VirtualArray + +Implement array using a file + +=item ShiftSplice + +Defines shift et al in terms of splice method + +=back + +=head2 Procedural options + +Support procedural interfaces for the common cases of Perl's +gratuitously OOO modules. Tom objects to "use IO::File" reading many +thousands of lines of code. + +=head2 RPC + +Write a module for transparent, portable remote procedure calls. (Not +core). This touches on the CORBA and ILU work. + +=head2 y2k localtime/gmtime + +Write a module, Y2k::Catch, which overloads localtime and gmtime's +returned year value and catches "bad" attempts to use it. + +=head2 Export File::Find variables + +Make File::Find export C<$name> etc manually, at least if asked to. + +=head2 Ioctl + +Finish a proper Ioctl module. + +=head2 Debugger attach/detach + +Permit a user to debug an already-running program. + +=head2 Regular Expression debugger + +Create a visual profiler/debugger tool that stepped you through the +execution of a regular expression point by point. Ilya has a module +to color-code and display regular expression parses and executions. +There's something at http://tkworld.org/ that might be a good start, +it's a Tk/Tcl RE wizard, that builds regexen of many flavours. + +=head2 Alternative RE Syntax + +Make an alternative regular expression syntax that is accessed through +a module. For instance, + + use RE; + $re = start_of_line() + ->literal("1998/10/08") + ->optional( whitespace() ) + ->literal("[") + ->remember( many( or( "-", digit() ) ) ); + + if (/$re/) { + print "time is $1\n"; + } + +Newbies to regular expressions typically only use a subset of the full +language. Perhaps you wouldn't have to implement the full feature set. + +=head2 Bundled modules + +Nicholas Clark (nick@flirble.org) had a patch for storing modules in +zipped format. This needs exploring and concluding. + +=head2 Expect + +Adopt IO::Tty, make it as portable as Don Libes' "expect" (can we link +against expect code?), and perfect a Perl version of expect. IO::Tty +and expect could then be distributed as part of the core distribution, +replacing Comm.pl and other hacks. + +=head2 GUI::Native + +A simple-to-use interface to native graphical abilities would +be welcomed. Oh, Perl's access Tk is nice enough, and reasonably +portable, but it's not particularly as fast as one would like. +Simple access to the mouse's cut buffer or mouse-presses shouldn't +required loading a few terabytes of Tk code. + +=head2 Update semibroken auxiliary tools; h2ph, a2p, etc. + +Kurt Starsinic is working on h2ph. mjd has fixed bugs in a2p in the +past. a2p apparently doesn't work on nawk and gawk extensions. +Graham Barr has an Include module that does h2ph work at runtime. + +=head2 POD Converters + +Brad's PodParser code needs to become part of the core, and the Pod::* +and pod2* programs rewritten to use this standard parser. Currently +the converters take different options, some behave in different +fashions, and some are more picky than others in terms of the POD +files they accept. + +=head2 pod2html + +A short-term fix: pod2html generates absolute HTML links. Make it +generate relative links. + +=head2 Podchecker + +Something like lint for Pod would be good. Something that catches +common errors as well as gross ones. Brad Appleton is putting +together something as part of his PodParser work. + +=head1 Tom's Wishes + +=head2 Webperl + +Design a webperl environment that's as tightly integrated and as +easy-to-use as Perl's current command-line environment. + +=head2 Mobile agents + +More work on a safe and secure execution environment for mobile +agents would be neat; the Safe.pm module is a start, but there's a +still a lot to be done in that area. Adopt Penguin? + +=head2 POSIX on non-POSIX + +Standard programming constructs for non-POSIX systems would help a +lot of programmers stuck on primitive, legacy systems. For example, +Microsoft still hasn't made a usable POSIX interface on their clunky +systems, which means that standard operations such as alarm() and +fork(), both critical for sophisticated client-server programming, +must both be kludged around. + +I'm unsure whether Tom means to emulate alarm( )and fork(), or merely +to provide a document like perlport.pod to say which features are +portable and which are not. + +=head2 Portable installations + +Figure out a portable semi-gelled installation, that is, one without +full paths. Larry has said that he's thinking about this. Ilya +pointed out that perllib_mangle() is good for this. + +=head1 Win32 Stuff + +=head2 Rename new headers to be consistent with the rest + +=head2 Sort out the spawnvp() mess + +=head2 Work out DLL versioning + +=head2 Style-check + +=head1 Would be nice to have + +=over 4 + +=item C<pack "(stuff)*"> + +=item Contiguous bitfields in pack/unpack + +=item lexperl + +=item Bundled perl preprocessor + +=item Use posix calls internally where possible + +=item format BOTTOM + +=item -i rename file only when successfully changed + +=item All ARGV input should act like <> + +=item report HANDLE [formats]. + +=item support in perlmain to rerun debugger + +=item lvalue functions + +Tuomas Lukka, on behalf of the PDL project, greatly desires this and +Ilya has a patch for it (probably against an older version of Perl). +Tuomas points out that what PDL really wants is lvalue I<methods>, +not just subs. + +=back + +=head1 Possible pragmas + +=head2 'less' + +(use less memory, CPU) + +=head1 Optimizations + +=head2 constant function cache + +=head2 foreach(reverse...) + +=head2 Cache eval tree + +Unless lexical outer scope used (mark in &compiling?). + +=head2 rcatmaybe + +=head2 Shrink opcode tables + +Via multiple implementations selected in peep. + +=head2 Cache hash value + +Not a win, according to Guido. + +=head2 Optimize away @_ where possible + +=head2 Optimize sort by { $a <=> $b } + +Greg Bacon added several more sort optimizations. These have +made it into 5.005_55, thanks to Hans Mulder. + +=head2 Rewrite regexp parser for better integrated optimization + +The regexp parser was rewritten for 5.005. Ilya's the regexp guru. + +=head1 Vague possibilities + +=over 4 + +=item ref function in list context + +This seems impossible to do without substantially breaking code. + +=item make tr/// return histogram in list context? + +=item Loop control on do{} et al + +=item Explicit switch statements + +Nobody has yet managed to come up with a switch syntax that would +allow for mixed hash, constant, regexp checks. Submit implementation +with syntax, please. + +=item compile to real threaded code + +=item structured types + +=item Modifiable $1 et al + +The intent is for this to be a means of editing the matched portions of +the target string. + +=back + +=head1 To Do Or Not To Do + +These are things that have been discussed in the past and roundly +criticized for being of questionable value. + +=head2 Making my() work on "package" variables + +Being able to say my($Foo::Bar), something that sounds ludicrous and +the 5.6 pumpking has mocked. + +=head2 "or" testing defined not truth + +We tell people that C<||> can be used to give a default value to a +variable: + + $children = shift || 5; # default is 5 children + +which is almost (but not): + + $children = shift; + $children = 5 unless $children; + +but if the first argument was given and is "0", then it will be +considered false by C<||> and C<5> used instead. Really we want +an C<||>-like operator that behaves like: + + $children = shift; + $children = 5 unless defined $children; + +Namely, a C<||> that tests defined-ness rather than truth. One was +discussed, and a patch submitted, but the objections were many. While +there were objections, many still feel the need. At least it was +decided that C<??> is the best name for the operator. + +=head2 "dynamic" lexicals + + my $x; + sub foo { + local $x; + } + +Localizing, as Tim Bunce points out, is a separate concept from +whether the variable is global or lexical. Chip Salzenberg had +an implementation once, but Larry thought it had potential to +confuse. + +=head2 "class"-based, rather than package-based "lexicals" + +This is like what the Alias module provides, but the variables would +be lexicals reserved by perl at compile-time, which really are indices +pointing into the pseudo-hash object visible inside every method so +declared. + +=head1 Threading + +=head2 Modules + +Which of the standard modules are thread-safe? Which CPAN modules? +How easy is it to fix those non-safe modules? + +=head2 Testing + +Threading is still experimental. Every reproducible bug identifies +something else for us to fix. Find and submit more of these problems. + +=head2 $AUTOLOAD + +=head2 exit/die + +Consistent semantics for exit/die in threads. + +=head2 External threads + +Better support for externally created threads. + +=head2 Thread::Pool + +=head2 thread-safety + +Spot-check globals like statcache and global GVs for thread-safety. +"B<Part done>", says Sarathy. + +=head2 Per-thread GVs + +According to Sarathy, this would make @_ be the same in threaded +and non-threaded, as well as helping solve problems like filehandles +(the same filehandle currently cannot be used in two threads). + +=head1 Compiler + +=head2 Optimization + +The compiler's back-end code-generators for creating bytecode or +compilable C code could use optimization work. + +=head2 Byteperl + +Figure out how and where byteperl will be built for the various +platforms. + +=head2 Precompiled modules + +Save byte-compiled modules on disk. + +=head2 Executables + +Auto-produce executable. + +=head2 Typed lexicals + +Typed lexicals should affect B::CC::load_pad. + +=head2 Win32 + +Workarounds to help Win32 dynamic loading. + +=head2 END blocks + +END blocks need saving in compiled output, now that CHECK blocks +are available. + +=head2 _AUTOLOAD + +_AUTOLOAD prodding. + +=head2 comppadlist + +Fix comppadlist (names in comppad_name can have fake SvCUR +from where newASSIGNOP steals the field). + +=head2 Cached compilation + +Can we install modules as bytecode? + +=head1 Recently Finished Tasks + +=head2 Figure a way out of $^(capital letter) + +Figure out a clean way to extend $^(capital letter) beyond +the 26 alphabets. (${^WORD} maybe?) + +Mark-Jason Dominus sent a patch which went into 5.005_56. + +=head2 Filenames + +Keep filenames in the distribution and in the standard module set +be 8.3 friendly where feasible. Good luck changing the standard +modules, though. + +=head2 Foreign lines + +Perl should be more generous in accepting foreign line terminations. +Mostly B<done> in 5.005. + +=head2 Namespace cleanup + + symbol-space: "pl_" prefix for all global vars + "Perl_" prefix for all functions + + CPP-space: stop malloc()/free() pollution unless asked + +=head2 ISA.pm + +Rename and alter ISA.pm. B<Done>. It is now base.pm. + +=head2 gettimeofday + +See Time::HiRes. + +=head2 autocroak? + +This is the Fatal.pm module, so any builtin that that does +not return success automatically die()s. If you're feeling brave, tie +this in with the unified exceptions scheme. + +=cut diff --git a/contrib/perl5/pod/perltoot.pod b/contrib/perl5/pod/perltoot.pod index c77a971..31a7c76 100644 --- a/contrib/perl5/pod/perltoot.pod +++ b/contrib/perl5/pod/perltoot.pod @@ -111,8 +111,8 @@ by up-casing the hash keys: PEERS => [ "Norbert", "Rhys", "Phineas"], }; -And so you could get at C<$rec-E<gt>{NAME}> to find "Jason", or -C<@{ $rec-E<gt>{PEERS} }> to get at "Norbert", "Rhys", and "Phineas". +And so you could get at C<< $rec->{NAME} >> to find "Jason", or +C<< @{ $rec->{PEERS} } >> to get at "Norbert", "Rhys", and "Phineas". (Have you ever noticed how many 23-year-old programmers seem to be named "Jason" these days? :-) @@ -329,7 +329,7 @@ do more than fetch or set one particular field. sub exclaim { my $self = shift; return sprintf "Hi, I'm %s, age %d, working with %s", - $self->{NAME}, $self->{AGE}, join(", ", $self->{PEERS}); + $self->{NAME}, $self->{AGE}, join(", ", @{$self->{PEERS}}); } Or maybe even one like this: @@ -542,7 +542,7 @@ and DESTROY methods as follows: } What happens if a derived class (which we'll call Employee) inherits -methods from this Person base class? Then C<Employee-E<gt>debug()>, when called +methods from this Person base class? Then C<< Employee->debug() >>, when called as a class method, manipulates $Person::Debugging not $Employee::Debugging. =head2 Class Destructors @@ -816,7 +816,7 @@ What do we mean by the Person::new() function -- isn't that actually a method? Well, in principle, yes. A method is just a function that expects as its first argument a class name (package) or object (blessed reference). Person::new() is the function that both the -C<Person-E<gt>new()> method and the C<Employee-E<gt>new()> method end +C<< Person->new() >> method and the C<< Employee->new() >> method end up calling. Understand that while a method call looks a lot like a function call, they aren't really quite the same, and if you treat them as the same, you'll very soon be left with nothing but broken programs. @@ -1124,8 +1124,7 @@ it happens when you say If you wanted to add version checking to your Person class explained above, just add this to Person.pm: - use vars qw($VERSION); - $VERSION = '1.1'; + our $VERSION = '1.1'; and then in Employee.pm could you can say @@ -1363,7 +1362,7 @@ constructor will look like when taking this approach: package Person; use Carp; - use vars qw($AUTOLOAD); # it's a package global + our $AUTOLOAD; # it's a package global my %fields = ( name => undef, @@ -1433,8 +1432,7 @@ Here's how to be careful: package Employee; use Person; use strict; - use vars qw(@ISA); - @ISA = qw(Person); + our @ISA = qw(Person); my %fields = ( id => undef, @@ -1560,16 +1558,15 @@ Here's the whole implementation: BEGIN { use Exporter (); - use vars qw(@EXPORT @EXPORT_OK %EXPORT_TAGS); - @EXPORT = qw(gethostbyname gethostbyaddr gethost); - @EXPORT_OK = qw( - $h_name @h_aliases - $h_addrtype $h_length - @h_addr_list $h_addr - ); - %EXPORT_TAGS = ( FIELDS => [ @EXPORT_OK, @EXPORT ] ); + our @EXPORT = qw(gethostbyname gethostbyaddr gethost); + our @EXPORT_OK = qw( + $h_name @h_aliases + $h_addrtype $h_length + @h_addr_list $h_addr + ); + our %EXPORT_TAGS = ( FIELDS => [ @EXPORT_OK, @EXPORT ] ); } - use vars @EXPORT_OK; + our @EXPORT_OK; # Class::Struct forbids use of @ISA sub import { goto &Exporter::import } @@ -1661,7 +1658,7 @@ update value fields in the hash. Convenient, eh? } use Alias qw(attr); - use vars qw($NAME $AGE $PEERS); + our ($NAME, $AGE, $PEERS); sub name { my $self = attr shift; @@ -1692,7 +1689,7 @@ update value fields in the hash. Convenient, eh? return ++$AGE; } -The need for the C<use vars> declaration is because what Alias does +The need for the C<our> declaration is because what Alias does is play with package globals with the same name as the fields. To use globals while C<use strict> is in effect, you have to predeclare them. These package variables are localized to the block enclosing the attr() diff --git a/contrib/perl5/pod/perltootc.pod b/contrib/perl5/pod/perltootc.pod new file mode 100644 index 0000000..64f8233 --- /dev/null +++ b/contrib/perl5/pod/perltootc.pod @@ -0,0 +1,1337 @@ +=head1 NAME + +perltootc - Tom's OO Tutorial for Class Data in Perl + +=head1 DESCRIPTION + +When designing an object class, you are sometimes faced with the situation +of wanting common state shared by all objects of that class. +Such I<class attributes> act somewhat like global variables for the entire +class, but unlike program-wide globals, class attributes have meaning only to +the class itself. + +Here are a few examples where class attributes might come in handy: + +=over + +=item * + +to keep a count of the objects you've created, or how many are +still extant. + +=item * + +to extract the name or file descriptor for a logfile used by a debugging +method. + +=item * + +to access collective data, like the total amount of cash dispensed by +all ATMs in a network in a given day. + +=item * + +to access the last object created by a class, or the most accessed object, +or to retrieve a list of all objects. + +=back + +Unlike a true global, class attributes should not be accessed directly. +Instead, their state should be inspected, and perhaps altered, only +through the mediated access of I<class methods>. These class attributes +accessor methods are similar in spirit and function to accessors used +to manipulate the state of instance attributes on an object. They provide a +clear firewall between interface and implementation. + +You should allow access to class attributes through either the class +name or any object of that class. If we assume that $an_object is of +type Some_Class, and the &Some_Class::population_count method accesses +class attributes, then these two invocations should both be possible, +and almost certainly equivalent. + + Some_Class->population_count() + $an_object->population_count() + +The question is, where do you store the state which that method accesses? +Unlike more restrictive languages like C++, where these are called +static data members, Perl provides no syntactic mechanism to declare +class attributes, any more than it provides a syntactic mechanism to +declare instance attributes. Perl provides the developer with a broad +set of powerful but flexible features that can be uniquely crafted to +the particular demands of the situation. + +A class in Perl is typically implemented in a module. A module consists +of two complementary feature sets: a package for interfacing with the +outside world, and a lexical file scope for privacy. Either of these +two mechanisms can be used to implement class attributes. That means you +get to decide whether to put your class attributes in package variables +or to put them in lexical variables. + +And those aren't the only decisions to make. If you choose to use package +variables, you can make your class attribute accessor methods either ignorant +of inheritance or sensitive to it. If you choose lexical variables, +you can elect to permit access to them from anywhere in the entire file +scope, or you can limit direct data access exclusively to the methods +implementing those attributes. + +=head1 Class Data as Package Variables + +Because a class in Perl is really just a package, using package variables +to hold class attributes is the most natural choice. This makes it simple +for each class to have its own class attributes. Let's say you have a class +called Some_Class that needs a couple of different attributes that you'd +like to be global to the entire class. The simplest thing to do is to +use package variables like $Some_Class::CData1 and $Some_Class::CData2 +to hold these attributes. But we certainly don't want to encourage +outsiders to touch those data directly, so we provide methods +to mediate access. + +In the accessor methods below, we'll for now just ignore the first +argument--that part to the left of the arrow on method invocation, which +is either a class name or an object reference. + + package Some_Class; + sub CData1 { + shift; # XXX: ignore calling class/object + $Some_Class::CData1 = shift if @_; + return $Some_Class::CData1; + } + sub CData2 { + shift; # XXX: ignore calling class/object + $Some_Class::CData2 = shift if @_; + return $Some_Class::CData2; + } + +This technique is highly legible and should be completely straightforward +to even the novice Perl programmer. By fully qualifying the package +variables, they stand out clearly when reading the code. Unfortunately, +if you misspell one of these, you've introduced an error that's hard +to catch. It's also somewhat disconcerting to see the class name itself +hard-coded in so many places. + +Both these problems can be easily fixed. Just add the C<use strict> +pragma, then pre-declare your package variables. (The C<our> operator +will be new in 5.6, and will work for package globals just like C<my> +works for scoped lexicals.) + + package Some_Class; + use strict; + our($CData1, $CData2); # our() is new to perl5.6 + sub CData1 { + shift; # XXX: ignore calling class/object + $CData1 = shift if @_; + return $CData1; + } + sub CData2 { + shift; # XXX: ignore calling class/object + $CData2 = shift if @_; + return $CData2; + } + + +As with any other global variable, some programmers prefer to start their +package variables with capital letters. This helps clarity somewhat, but +by no longer fully qualifying the package variables, their significance +can be lost when reading the code. You can fix this easily enough by +choosing better names than were used here. + +=head2 Putting All Your Eggs in One Basket + +Just as the mindless enumeration of accessor methods for instance attributes +grows tedious after the first few (see L<perltoot>), so too does the +repetition begin to grate when listing out accessor methods for class +data. Repetition runs counter to the primary virtue of a programmer: +Laziness, here manifesting as that innate urge every programmer feels +to factor out duplicate code whenever possible. + +Here's what to do. First, make just one hash to hold all class attributes. + + package Some_Class; + use strict; + our %ClassData = ( # our() is new to perl5.6 + CData1 => "", + CData2 => "", + ); + +Using closures (see L<perlref>) and direct access to the package symbol +table (see L<perlmod>), now clone an accessor method for each key in +the %ClassData hash. Each of these methods is used to fetch or store +values to the specific, named class attribute. + + for my $datum (keys %ClassData) { + no strict "refs"; # to register new methods in package + *$datum = sub { + shift; # XXX: ignore calling class/object + $ClassData{$datum} = shift if @_; + return $ClassData{$datum}; + } + } + +It's true that you could work out a solution employing an &AUTOLOAD +method, but this approach is unlikely to prove satisfactory. Your +function would have to distinguish between class attributes and object +attributes; it could interfere with inheritance; and it would have to +careful about DESTROY. Such complexity is uncalled for in most cases, +and certainly in this one. + +You may wonder why we're rescinding strict refs for the loop. We're +manipulating the package's symbol table to introduce new function names +using symbolic references (indirect naming), which the strict pragma +would otherwise forbid. Normally, symbolic references are a dodgy +notion at best. This isn't just because they can be used accidentally +when you aren't meaning to. It's also because for most uses +to which beginning Perl programmers attempt to put symbolic references, +we have much better approaches, like nested hashes or hashes of arrays. +But there's nothing wrong with using symbolic references to manipulate +something that is meaningful only from the perspective of the package +symbol symbol table, like method names or package variables. In other +words, when you want to refer to the symbol table, use symbol references. + +Clustering all the class attributes in one place has several advantages. +They're easy to spot, initialize, and change. The aggregation also +makes them convenient to access externally, such as from a debugger +or a persistence package. The only possible problem is that we don't +automatically know the name of each class's class object, should it have +one. This issue is addressed below in L<"The Eponymous Meta-Object">. + +=head2 Inheritance Concerns + +Suppose you have an instance of a derived class, and you access class +data using an inherited method call. Should that end up referring +to the base class's attributes, or to those in the derived class? +How would it work in the earlier examples? The derived class inherits +all the base class's methods, including those that access class attributes. +But what package are the class attributes stored in? + +The answer is that, as written, class attributes are stored in the package into +which those methods were compiled. When you invoke the &CData1 method +on the name of the derived class or on one of that class's objects, the +version shown above is still run, so you'll access $Some_Class::CData1--or +in the method cloning version, C<$Some_Class::ClassData{CData1}>. + +Think of these class methods as executing in the context of their base +class, not in that of their derived class. Sometimes this is exactly +what you want. If Feline subclasses Carnivore, then the population of +Carnivores in the world should go up when a new Feline is born. +But what if you wanted to figure out how many Felines you have apart +from Carnivores? The current approach doesn't support that. + +You'll have to decide on a case-by-case basis whether it makes any sense +for class attributes to be package-relative. If you want it to be so, +then stop ignoring the first argument to the function. Either it will +be a package name if the method was invoked directly on a class name, +or else it will be an object reference if the method was invoked on an +object reference. In the latter case, the ref() function provides the +class of that object. + + package Some_Class; + sub CData1 { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + my $varname = $class . "::CData1"; + no strict "refs"; # to access package data symbolically + $$varname = shift if @_; + return $$varname; + } + +And then do likewise for all other class attributes (such as CData2, +etc.) that you wish to access as package variables in the invoking package +instead of the compiling package as we had previously. + +Once again we temporarily disable the strict references ban, because +otherwise we couldn't use the fully-qualified symbolic name for +the package global. This is perfectly reasonable: since all package +variables by definition live in a package, there's nothing wrong with +accessing them via that package's symbol table. That's what it's there +for (well, somewhat). + +What about just using a single hash for everything and then cloning +methods? What would that look like? The only difference would be the +closure used to produce new method entries for the class's symbol table. + + no strict "refs"; + *$datum = sub { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + my $varname = $class . "::ClassData"; + $varname->{$datum} = shift if @_; + return $varname->{$datum}; + } + +=head2 The Eponymous Meta-Object + +It could be argued that the %ClassData hash in the previous example is +neither the most imaginative nor the most intuitive of names. Is there +something else that might make more sense, be more useful, or both? + +As it happens, yes, there is. For the "class meta-object", we'll use +a package variable of the same name as the package itself. Within the +scope of a package Some_Class declaration, we'll use the eponymously +named hash %Some_Class as that class's meta-object. (Using an eponymously +named hash is somewhat reminiscent of classes that name their constructors +eponymously in the Python or C++ fashion. That is, class Some_Class would +use &Some_Class::Some_Class as a constructor, probably even exporting that +name as well. The StrNum class in Recipe 13.14 in I<The Perl Cookbook> +does this, if you're looking for an example.) + +This predictable approach has many benefits, including having a well-known +identifier to aid in debugging, transparent persistence, +or checkpointing. It's also the obvious name for monadic classes and +translucent attributes, discussed later. + +Here's an example of such a class. Notice how the name of the +hash storing the meta-object is the same as the name of the package +used to implement the class. + + package Some_Class; + use strict; + + # create class meta-object using that most perfect of names + our %Some_Class = ( # our() is new to perl5.6 + CData1 => "", + CData2 => "", + ); + + # this accessor is calling-package-relative + sub CData1 { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + no strict "refs"; # to access eponymous meta-object + $class->{CData1} = shift if @_; + return $class->{CData1}; + } + + # but this accessor is not + sub CData2 { + shift; # XXX: ignore calling class/object + no strict "refs"; # to access eponymous meta-object + __PACKAGE__ -> {CData2} = shift if @_; + return __PACKAGE__ -> {CData2}; + } + +In the second accessor method, the __PACKAGE__ notation was used for +two reasons. First, to avoid hardcoding the literal package name +in the code in case we later want to change that name. Second, to +clarify to the reader that what matters here is the package currently +being compiled into, not the package of the invoking object or class. +If the long sequence of non-alphabetic characters bothers you, you can +always put the __PACKAGE__ in a variable first. + + sub CData2 { + shift; # XXX: ignore calling class/object + no strict "refs"; # to access eponymous meta-object + my $class = __PACKAGE__; + $class->{CData2} = shift if @_; + return $class->{CData2}; + } + +Even though we're using symbolic references for good not evil, some +folks tend to become unnerved when they see so many places with strict +ref checking disabled. Given a symbolic reference, you can always +produce a real reference (the reverse is not true, though). So we'll +create a subroutine that does this conversion for us. If invoked as a +function of no arguments, it returns a reference to the compiling class's +eponymous hash. Invoked as a class method, it returns a reference to +the eponymous hash of its caller. And when invoked as an object method, +this function returns a reference to the eponymous hash for whatever +class the object belongs to. + + package Some_Class; + use strict; + + our %Some_Class = ( # our() is new to perl5.6 + CData1 => "", + CData2 => "", + ); + + # tri-natured: function, class method, or object method + sub _classobj { + my $obclass = shift || __PACKAGE__; + my $class = ref($obclass) || $obclass; + no strict "refs"; # to convert sym ref to real one + return \%$class; + } + + for my $datum (keys %{ _classobj() } ) { + # turn off strict refs so that we can + # register a method in the symbol table + no strict "refs"; + *$datum = sub { + use strict "refs"; + my $self = shift->_classobj(); + $self->{$datum} = shift if @_; + return $self->{$datum}; + } + } + +=head2 Indirect References to Class Data + +A reasonably common strategy for handling class attributes is to store +a reference to each package variable on the object itself. This is +a strategy you've probably seen before, such as in L<perltoot> and +L<perlbot>, but there may be variations in the example below that you +haven't thought of before. + + package Some_Class; + our($CData1, $CData2); # our() is new to perl5.6 + + sub new { + my $obclass = shift; + return bless my $self = { + ObData1 => "", + ObData2 => "", + CData1 => \$CData1, + CData2 => \$CData2, + } => (ref $obclass || $obclass); + } + + sub ObData1 { + my $self = shift; + $self->{ObData1} = shift if @_; + return $self->{ObData1}; + } + + sub ObData2 { + my $self = shift; + $self->{ObData2} = shift if @_; + return $self->{ObData2}; + } + + sub CData1 { + my $self = shift; + my $dataref = ref $self + ? $self->{CData1} + : \$CData1; + $$dataref = shift if @_; + return $$dataref; + } + + sub CData2 { + my $self = shift; + my $dataref = ref $self + ? $self->{CData2} + : \$CData2; + $$dataref = shift if @_; + return $$dataref; + } + +As written above, a derived class will inherit these methods, which +will consequently access package variables in the base class's package. +This is not necessarily expected behavior in all circumstances. Here's an +example that uses a variable meta-object, taking care to access the +proper package's data. + + package Some_Class; + use strict; + + our %Some_Class = ( # our() is new to perl5.6 + CData1 => "", + CData2 => "", + ); + + sub _classobj { + my $self = shift; + my $class = ref($self) || $self; + no strict "refs"; + # get (hard) ref to eponymous meta-object + return \%$class; + } + + sub new { + my $obclass = shift; + my $classobj = $obclass->_classobj(); + bless my $self = { + ObData1 => "", + ObData2 => "", + CData1 => \$classobj->{CData1}, + CData2 => \$classobj->{CData2}, + } => (ref $obclass || $obclass); + return $self; + } + + sub ObData1 { + my $self = shift; + $self->{ObData1} = shift if @_; + return $self->{ObData1}; + } + + sub ObData2 { + my $self = shift; + $self->{ObData2} = shift if @_; + return $self->{ObData2}; + } + + sub CData1 { + my $self = shift; + $self = $self->_classobj() unless ref $self; + my $dataref = $self->{CData1}; + $$dataref = shift if @_; + return $$dataref; + } + + sub CData2 { + my $self = shift; + $self = $self->_classobj() unless ref $self; + my $dataref = $self->{CData2}; + $$dataref = shift if @_; + return $$dataref; + } + +Not only are we now strict refs clean, using an eponymous meta-object +seems to make the code cleaner. Unlike the previous version, this one +does something interesting in the face of inheritance: it accesses the +class meta-object in the invoking class instead of the one into which +the method was initially compiled. + +You can easily access data in the class meta-object, making +it easy to dump the complete class state using an external mechanism such +as when debugging or implementing a persistent class. This works because +the class meta-object is a package variable, has a well-known name, and +clusters all its data together. (Transparent persistence +is not always feasible, but it's certainly an appealing idea.) + +There's still no check that object accessor methods have not been +invoked on a class name. If strict ref checking is enabled, you'd +blow up. If not, then you get the eponymous meta-object. What you do +with--or about--this is up to you. The next two sections demonstrate +innovative uses for this powerful feature. + +=head2 Monadic Classes + +Some of the standard modules shipped with Perl provide class interfaces +without any attribute methods whatsoever. The most commonly used module +not numbered amongst the pragmata, the Exporter module, is a class with +neither constructors nor attributes. Its job is simply to provide a +standard interface for modules wishing to export part of their namespace +into that of their caller. Modules use the Exporter's &import method by +setting their inheritance list in their package's @ISA array to mention +"Exporter". But class Exporter provides no constructor, so you can't +have several instances of the class. In fact, you can't have any--it +just doesn't make any sense. All you get is its methods. Its interface +contains no statefulness, so state data is wholly superfluous. + +Another sort of class that pops up from time to time is one that supports +a unique instance. Such classes are called I<monadic classes>, or less +formally, I<singletons> or I<highlander classes>. + +If a class is monadic, where do you store its state, that is, +its attributes? How do you make sure that there's never more than +one instance? While you could merely use a slew of package variables, +it's a lot cleaner to use the eponymously named hash. Here's a complete +example of a monadic class: + + package Cosmos; + %Cosmos = (); + + # accessor method for "name" attribute + sub name { + my $self = shift; + $self->{name} = shift if @_; + return $self->{name}; + } + + # read-only accessor method for "birthday" attribute + sub birthday { + my $self = shift; + die "can't reset birthday" if @_; # XXX: croak() is better + return $self->{birthday}; + } + + # accessor method for "stars" attribute + sub stars { + my $self = shift; + $self->{stars} = shift if @_; + return $self->{stars}; + } + + # oh my - one of our stars just went out! + sub supernova { + my $self = shift; + my $count = $self->stars(); + $self->stars($count - 1) if $count > 0; + } + + # constructor/initializer method - fix by reboot + sub bigbang { + my $self = shift; + %$self = ( + name => "the world according to tchrist", + birthday => time(), + stars => 0, + ); + return $self; # yes, it's probably a class. SURPRISE! + } + + # After the class is compiled, but before any use or require + # returns, we start off the universe with a bang. + __PACKAGE__ -> bigbang(); + +Hold on, that doesn't look like anything special. Those attribute +accessors look no different than they would if this were a regular class +instead of a monadic one. The crux of the matter is there's nothing +that says that $self must hold a reference to a blessed object. It merely +has to be something you can invoke methods on. Here the package name +itself, Cosmos, works as an object. Look at the &supernova method. Is that +a class method or an object method? The answer is that static analysis +cannot reveal the answer. Perl doesn't care, and neither should you. +In the three attribute methods, C<%$self> is really accessing the %Cosmos +package variable. + +If like Stephen Hawking, you posit the existence of multiple, sequential, +and unrelated universes, then you can invoke the &bigbang method yourself +at any time to start everything all over again. You might think of +&bigbang as more of an initializer than a constructor, since the function +doesn't allocate new memory; it only initializes what's already there. +But like any other constructor, it does return a scalar value to use +for later method invocations. + +Imagine that some day in the future, you decide that one universe just +isn't enough. You could write a new class from scratch, but you already +have an existing class that does what you want--except that it's monadic, +and you want more than just one cosmos. + +That's what code reuse via subclassing is all about. Look how short +the new code is: + + package Multiverse; + use Cosmos; + @ISA = qw(Cosmos); + + sub new { + my $protoverse = shift; + my $class = ref($protoverse) || $protoverse; + my $self = {}; + return bless($self, $class)->bigbang(); + } + 1; + +Because we were careful to be good little creators when we designed our +Cosmos class, we can now reuse it without touching a single line of code +when it comes time to write our Multiverse class. The same code that +worked when invoked as a class method continues to work perfectly well +when invoked against separate instances of a derived class. + +The astonishing thing about the Cosmos class above is that the value +returned by the &bigbang "constructor" is not a reference to a blessed +object at all. It's just the class's own name. A class name is, for +virtually all intents and purposes, a perfectly acceptable object. +It has state, behavior, and identify, the three crucial components +of an object system. It even manifests inheritance, polymorphism, +and encapsulation. And what more can you ask of an object? + +To understand object orientation in Perl, it's important to recognize the +unification of what other programming languages might think of as class +methods and object methods into just plain methods. "Class methods" +and "object methods" are distinct only in the compartmentalizing mind +of the Perl programmer, not in the Perl language itself. + +Along those same lines, a constructor is nothing special either, which +is one reason why Perl has no pre-ordained name for them. "Constructor" +is just an informal term loosely used to describe a method that returns +a scalar value that you can make further method calls against. So long +as it's either a class name or an object reference, that's good enough. +It doesn't even have to be a reference to a brand new object. + +You can have as many--or as few--constructors as you want, and you can +name them whatever you care to. Blindly and obediently using new() +for each and every constructor you ever write is to speak Perl with +such a severe C++ accent that you do a disservice to both languages. +There's no reason to insist that each class have but one constructor, +or that that constructor be named new(), or that that constructor be +used solely as a class method and not an object method. + +The next section shows how useful it can be to further distance ourselves +from any formal distinction between class method calls and object method +calls, both in constructors and in accessor methods. + +=head2 Translucent Attributes + +A package's eponymous hash can be used for more than just containing +per-class, global state data. It can also serve as a sort of template +containing default settings for object attributes. These default +settings can then be used in constructors for initialization of a +particular object. The class's eponymous hash can also be used to +implement I<translucent attributes>. A translucent attribute is one +that has a class-wide default. Each object can set its own value for the +attribute, in which case C<< $object->attribute() >> returns that value. +But if no value has been set, then C<< $object->attribute() >> returns +the class-wide default. + +We'll apply something of a copy-on-write approach to these translucent +attributes. If you're just fetching values from them, you get +translucency. But if you store a new value to them, that new value is +set on the current object. On the other hand, if you use the class as +an object and store the attribute value directly on the class, then the +meta-object's value changes, and later fetch operations on objects with +uninitialized values for those attributes will retrieve the meta-object's +new values. Objects with their own initialized values, however, won't +see any change. + +Let's look at some concrete examples of using these properties before we +show how to implement them. Suppose that a class named Some_Class +had a translucent data attribute called "color". First you set the color +in the meta-object, then you create three objects using a constructor +that happens to be named &spawn. + + use Vermin; + Vermin->color("vermilion"); + + $ob1 = Vermin->spawn(); # so that's where Jedi come from + $ob2 = Vermin->spawn(); + $ob3 = Vermin->spawn(); + + print $obj3->color(); # prints "vermilion" + +Each of these objects' colors is now "vermilion", because that's the +meta-object's value that attribute, and these objects do not have +individual color values set. + +Changing the attribute on one object has no effect on other objects +previously created. + + $ob3->color("chartreuse"); + print $ob3->color(); # prints "chartreuse" + print $ob1->color(); # prints "vermilion", translucently + +If you now use $ob3 to spawn off another object, the new object will +take the color its parent held, which now happens to be "chartreuse". +That's because the constructor uses the invoking object as its template +for initializing attributes. When that invoking object is the +class name, the object used as a template is the eponymous meta-object. +When the invoking object is a reference to an instantiated object, the +&spawn constructor uses that existing object as a template. + + $ob4 = $ob3->spawn(); # $ob3 now template, not %Vermin + print $ob4->color(); # prints "chartreuse" + +Any actual values set on the template object will be copied to the +new object. But attributes undefined in the template object, being +translucent, will remain undefined and consequently translucent in the +new one as well. + +Now let's change the color attribute on the entire class: + + Vermin->color("azure"); + print $ob1->color(); # prints "azure" + print $ob2->color(); # prints "azure" + print $ob3->color(); # prints "chartreuse" + print $ob4->color(); # prints "chartreuse" + +That color change took effect only in the first pair of objects, which +were still translucently accessing the meta-object's values. The second +pair had per-object initialized colors, and so didn't change. + +One important question remains. Changes to the meta-object are reflected +in translucent attributes in the entire class, but what about +changes to discrete objects? If you change the color of $ob3, does the +value of $ob4 see that change? Or vice-versa. If you change the color +of $ob4, does then the value of $ob3 shift? + + $ob3->color("amethyst"); + print $ob3->color(); # prints "amethyst" + print $ob4->color(); # hmm: "chartreuse" or "amethyst"? + +While one could argue that in certain rare cases it should, let's not +do that. Good taste aside, we want the answer to the question posed in +the comment above to be "chartreuse", not "amethyst". So we'll treat +these attributes similar to the way process attributes like environment +variables, user and group IDs, or the current working directory are +treated across a fork(). You can change only yourself, but you will see +those changes reflected in your unspawned children. Changes to one object +will propagate neither up to the parent nor down to any existing child objects. +Those objects made later, however, will see the changes. + +If you have an object with an actual attribute value, and you want to +make that object's attribute value translucent again, what do you do? +Let's design the class so that when you invoke an accessor method with +C<undef> as its argument, that attribute returns to translucency. + + $ob4->color(undef); # back to "azure" + +Here's a complete implementation of Vermin as described above. + + package Vermin; + + # here's the class meta-object, eponymously named. + # it holds all class attributes, and also all instance attributes + # so the latter can be used for both initialization + # and translucency. + + our %Vermin = ( # our() is new to perl5.6 + PopCount => 0, # capital for class attributes + color => "beige", # small for instance attributes + ); + + # constructor method + # invoked as class method or object method + sub spawn { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + my $self = {}; + bless($self, $class); + $class->{PopCount}++; + # init fields from invoking object, or omit if + # invoking object is the class to provide translucency + %$self = %$obclass if ref $obclass; + return $self; + } + + # translucent accessor for "color" attribute + # invoked as class method or object method + sub color { + my $self = shift; + my $class = ref($self) || $self; + + # handle class invocation + unless (ref $self) { + $class->{color} = shift if @_; + return $class->{color} + } + + # handle object invocation + $self->{color} = shift if @_; + if (defined $self->{color}) { # not exists! + return $self->{color}; + } else { + return $class->{color}; + } + } + + # accessor for "PopCount" class attribute + # invoked as class method or object method + # but uses object solely to locate meta-object + sub population { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + return $class->{PopCount}; + } + + # instance destructor + # invoked only as object method + sub DESTROY { + my $self = shift; + my $class = ref $self; + $class->{PopCount}--; + } + +Here are a couple of helper methods that might be convenient. They aren't +accessor methods at all. They're used to detect accessibility of data +attributes. The &is_translucent method determines whether a particular +object attribute is coming from the meta-object. The &has_attribute +method detects whether a class implements a particular property at all. +It could also be used to distinguish undefined properties from non-existent +ones. + + # detect whether an object attribute is translucent + # (typically?) invoked only as object method + sub is_translucent { + my($self, $attr) = @_; + return !defined $self->{$attr}; + } + + # test for presence of attribute in class + # invoked as class method or object method + sub has_attribute { + my($self, $attr) = @_; + my $class = ref $self if $self; + return exists $class->{$attr}; + } + +If you prefer to install your accessors more generically, you can make +use of the upper-case versus lower-case convention to register into the +package appropriate methods cloned from generic closures. + + for my $datum (keys %{ +__PACKAGE__ }) { + *$datum = ($datum =~ /^[A-Z]/) + ? sub { # install class accessor + my $obclass = shift; + my $class = ref($obclass) || $obclass; + return $class->{$datum}; + } + : sub { # install translucent accessor + my $self = shift; + my $class = ref($self) || $self; + unless (ref $self) { + $class->{$datum} = shift if @_; + return $class->{$datum} + } + $self->{$datum} = shift if @_; + return defined $self->{$datum} + ? $self -> {$datum} + : $class -> {$datum} + } + } + +Translations of this closure-based approach into C++, Java, and Python +have been left as exercises for the reader. Be sure to send us mail as +soon as you're done. + +=head1 Class Data as Lexical Variables + +=head2 Privacy and Responsibility + +Unlike conventions used by some Perl programmers, in the previous +examples, we didn't prefix the package variables used for class attributes +with an underscore, nor did we do so for the names of the hash keys used +for instance attributes. You don't need little markers on data names to +suggest nominal privacy on attribute variables or hash keys, because these +are B<already> notionally private! Outsiders have no business whatsoever +playing with anything within a class save through the mediated access of +its documented interface; in other words, through method invocations. +And not even through just any method, either. Methods that begin with +an underscore are traditionally considered off-limits outside the class. +If outsiders skip the documented method interface to poke around the +internals of your class and end up breaking something, that's not your +fault--it's theirs. + +Perl believes in individual responsibility rather than mandated control. +Perl respects you enough to let you choose your own preferred level of +pain, or of pleasure. Perl believes that you are creative, intelligent, +and capable of making your own decisions--and fully expects you to +take complete responsibility for your own actions. In a perfect world, +these admonitions alone would suffice, and everyone would be intelligent, +responsible, happy, and creative. And careful. One probably shouldn't +forget careful, and that's a good bit harder to expect. Even Einstein +would take wrong turns by accident and end up lost in the wrong part +of town. + +Some folks get the heebie-jeebies when they see package variables +hanging out there for anyone to reach over and alter them. Some folks +live in constant fear that someone somewhere might do something wicked. +The solution to that problem is simply to fire the wicked, of course. +But unfortunately, it's not as simple as all that. These cautious +types are also afraid that they or others will do something not so +much wicked as careless, whether by accident or out of desperation. +If we fire everyone who ever gets careless, pretty soon there won't be +anybody left to get any work done. + +Whether it's needless paranoia or sensible caution, this uneasiness can +be a problem for some people. We can take the edge off their discomfort +by providing the option of storing class attributes as lexical variables +instead of as package variables. The my() operator is the source of +all privacy in Perl, and it is a powerful form of privacy indeed. + +It is widely perceived, and indeed has often been written, that Perl +provides no data hiding, that it affords the class designer no privacy +nor isolation, merely a rag-tag assortment of weak and unenforcible +social conventions instead. This perception is demonstrably false and +easily disproven. In the next section, we show how to implement forms +of privacy that are far stronger than those provided in nearly any +other object-oriented language. + +=head2 File-Scoped Lexicals + +A lexical variable is visible only through the end of its static scope. +That means that the only code able to access that variable is code +residing textually below the my() operator through the end of its block +if it has one, or through the end of the current file if it doesn't. + +Starting again with our simplest example given at the start of this +document, we replace our() variables with my() versions. + + package Some_Class; + my($CData1, $CData2); # file scope, not in any package + sub CData1 { + shift; # XXX: ignore calling class/object + $CData1 = shift if @_; + return $CData1; + } + sub CData2 { + shift; # XXX: ignore calling class/object + $CData2 = shift if @_; + return $CData2; + } + +So much for that old $Some_Class::CData1 package variable and its brethren! +Those are gone now, replaced with lexicals. No one outside the +scope can reach in and alter the class state without resorting to the +documented interface. Not even subclasses or superclasses of +this one have unmediated access to $CData1. They have to invoke the &CData1 +method against Some_Class or an instance thereof, just like anybody else. + +To be scrupulously honest, that last statement assumes you haven't packed +several classes together into the same file scope, nor strewn your class +implementation across several different files. Accessibility of those +variables is based uniquely on the static file scope. It has nothing to +do with the package. That means that code in a different file but +the same package (class) could not access those variables, yet code in the +same file but a different package (class) could. There are sound reasons +why we usually suggest a one-to-one mapping between files and packages +and modules and classes. You don't have to stick to this suggestion if +you really know what you're doing, but you're apt to confuse yourself +otherwise, especially at first. + +If you'd like to aggregate your class attributes into one lexically scoped, +composite structure, you're perfectly free to do so. + + package Some_Class; + my %ClassData = ( + CData1 => "", + CData2 => "", + ); + sub CData1 { + shift; # XXX: ignore calling class/object + $ClassData{CData1} = shift if @_; + return $ClassData{CData1}; + } + sub CData2 { + shift; # XXX: ignore calling class/object + $ClassData{CData2} = shift if @_; + return $ClassData{CData2}; + } + +To make this more scalable as other class attributes are added, we can +again register closures into the package symbol table to create accessor +methods for them. + + package Some_Class; + my %ClassData = ( + CData1 => "", + CData2 => "", + ); + for my $datum (keys %ClassData) { + no strict "refs"; + *$datum = sub { + shift; # XXX: ignore calling class/object + $ClassData{$datum} = shift if @_; + return $ClassData{$datum}; + }; + } + +Requiring even your own class to use accessor methods like anybody else is +probably a good thing. But demanding and expecting that everyone else, +be they subclass or superclass, friend or foe, will all come to your +object through mediation is more than just a good idea. It's absolutely +critical to the model. Let there be in your mind no such thing as +"public" data, nor even "protected" data, which is a seductive but +ultimately destructive notion. Both will come back to bite at you. +That's because as soon as you take that first step out of the solid +position in which all state is considered completely private, save from the +perspective of its own accessor methods, you have violated the envelope. +And, having pierced that encapsulating envelope, you shall doubtless +someday pay the price when future changes in the implementation break +unrelated code. Considering that avoiding this infelicitous outcome was +precisely why you consented to suffer the slings and arrows of obsequious +abstraction by turning to object orientation in the first place, such +breakage seems unfortunate in the extreme. + +=head2 More Inheritance Concerns + +Suppose that Some_Class were used as a base class from which to derive +Another_Class. If you invoke a &CData method on the derived class or +on an object of that class, what do you get? Would the derived class +have its own state, or would it piggyback on its base class's versions +of the class attributes? + +The answer is that under the scheme outlined above, the derived class +would B<not> have its own state data. As before, whether you consider +this a good thing or a bad one depends on the semantics of the classes +involved. + +The cleanest, sanest, simplest way to address per-class state in a +lexical is for the derived class to override its base class's version +of the method that accesses the class attributes. Since the actual method +called is the one in the object's derived class if this exists, you +automatically get per-class state this way. Any urge to provide an +unadvertised method to sneak out a reference to the %ClassData hash +should be strenuously resisted. + +As with any other overridden method, the implementation in the +derived class always has the option of invoking its base class's +version of the method in addition to its own. Here's an example: + + package Another_Class; + @ISA = qw(Some_Class); + + my %ClassData = ( + CData1 => "", + ); + + sub CData1 { + my($self, $newvalue) = @_; + if (@_ > 1) { + # set locally first + $ClassData{CData1} = $newvalue; + + # then pass the buck up to the first + # overridden version, if there is one + if ($self->can("SUPER::CData1")) { + $self->SUPER::CData1($newvalue); + } + } + return $ClassData{CData1}; + } + +Those dabbling in multiple inheritance might be concerned +about there being more than one override. + + for my $parent (@ISA) { + my $methname = $parent . "::CData1"; + if ($self->can($methname)) { + $self->$methname($newvalue); + } + } + +Because the &UNIVERSAL::can method returns a reference +to the function directly, you can use this directly +for a significant performance improvement: + + for my $parent (@ISA) { + if (my $coderef = $self->can($parent . "::CData1")) { + $self->$coderef($newvalue); + } + } + +=head2 Locking the Door and Throwing Away the Key + +As currently implemented, any code within the same scope as the +file-scoped lexical %ClassData can alter that hash directly. Is that +ok? Is it acceptable or even desirable to allow other parts of the +implementation of this class to access class attributes directly? + +That depends on how careful you want to be. Think back to the Cosmos +class. If the &supernova method had directly altered $Cosmos::Stars or +C<$Cosmos::Cosmos{stars}>, then we wouldn't have been able to reuse the +class when it came to inventing a Multiverse. So letting even the class +itself access its own class attributes without the mediating intervention of +properly designed accessor methods is probably not a good idea after all. + +Restricting access to class attributes from the class itself is usually +not enforcible even in strongly object-oriented languages. But in Perl, +you can. + +Here's one way: + + package Some_Class; + + { # scope for hiding $CData1 + my $CData1; + sub CData1 { + shift; # XXX: unused + $CData1 = shift if @_; + return $CData1; + } + } + + { # scope for hiding $CData2 + my $CData2; + sub CData2 { + shift; # XXX: unused + $CData2 = shift if @_; + return $CData2; + } + } + +No one--absolutely no one--is allowed to read or write the class +attributes without the mediation of the managing accessor method, since +only that method has access to the lexical variable it's managing. +This use of mediated access to class attributes is a form of privacy far +stronger than most OO languages provide. + +The repetition of code used to create per-datum accessor methods chafes +at our Laziness, so we'll again use closures to create similar +methods. + + package Some_Class; + + { # scope for ultra-private meta-object for class attributes + my %ClassData = ( + CData1 => "", + CData2 => "", + ); + + for my $datum (keys %ClassData ) { + no strict "refs"; + *$datum = sub { + use strict "refs"; + my ($self, $newvalue) = @_; + $ClassData{$datum} = $newvalue if @_ > 1; + return $ClassData{$datum}; + } + } + + } + +The closure above can be modified to take inheritance into account using +the &UNIVERSAL::can method and SUPER as shown previously. + +=head2 Translucency Revisited + +The Vermin class demonstrates translucency using a package variable, +eponymously named %Vermin, as its meta-object. If you prefer to +use absolutely no package variables beyond those necessary to appease +inheritance or possibly the Exporter, this strategy is closed to you. +That's too bad, because translucent attributes are an appealing +technique, so it would be valuable to devise an implementation using +only lexicals. + +There's a second reason why you might wish to avoid the eponymous +package hash. If you use class names with double-colons in them, you +would end up poking around somewhere you might not have meant to poke. + + package Vermin; + $class = "Vermin"; + $class->{PopCount}++; + # accesses $Vermin::Vermin{PopCount} + + package Vermin::Noxious; + $class = "Vermin::Noxious"; + $class->{PopCount}++; + # accesses $Vermin::Noxious{PopCount} + +In the first case, because the class name had no double-colons, we got +the hash in the current package. But in the second case, instead of +getting some hash in the current package, we got the hash %Noxious in +the Vermin package. (The noxious vermin just invaded another package and +sprayed their data around it. :-) Perl doesn't support relative packages +in its naming conventions, so any double-colons trigger a fully-qualified +lookup instead of just looking in the current package. + +In practice, it is unlikely that the Vermin class had an existing +package variable named %Noxious that you just blew away. If you're +still mistrustful, you could always stake out your own territory +where you know the rules, such as using Eponymous::Vermin::Noxious or +Hieronymus::Vermin::Boschious or Leave_Me_Alone::Vermin::Noxious as class +names instead. Sure, it's in theory possible that someone else has +a class named Eponymous::Vermin with its own %Noxious hash, but this +kind of thing is always true. There's no arbiter of package names. +It's always the case that globals like @Cwd::ISA would collide if more +than one class uses the same Cwd package. + +If this still leaves you with an uncomfortable twinge of paranoia, +we have another solution for you. There's nothing that says that you +have to have a package variable to hold a class meta-object, either for +monadic classes or for translucent attributes. Just code up the methods +so that they access a lexical instead. + +Here's another implementation of the Vermin class with semantics identical +to those given previously, but this time using no package variables. + + package Vermin; + + + # Here's the class meta-object, eponymously named. + # It holds all class data, and also all instance data + # so the latter can be used for both initialization + # and translucency. it's a template. + my %ClassData = ( + PopCount => 0, # capital for class attributes + color => "beige", # small for instance attributes + ); + + # constructor method + # invoked as class method or object method + sub spawn { + my $obclass = shift; + my $class = ref($obclass) || $obclass; + my $self = {}; + bless($self, $class); + $ClassData{PopCount}++; + # init fields from invoking object, or omit if + # invoking object is the class to provide translucency + %$self = %$obclass if ref $obclass; + return $self; + } + + # translucent accessor for "color" attribute + # invoked as class method or object method + sub color { + my $self = shift; + + # handle class invocation + unless (ref $self) { + $ClassData{color} = shift if @_; + return $ClassData{color} + } + + # handle object invocation + $self->{color} = shift if @_; + if (defined $self->{color}) { # not exists! + return $self->{color}; + } else { + return $ClassData{color}; + } + } + + # class attribute accessor for "PopCount" attribute + # invoked as class method or object method + sub population { + return $ClassData{PopCount}; + } + + # instance destructor; invoked only as object method + sub DESTROY { + $ClassData{PopCount}--; + } + + # detect whether an object attribute is translucent + # (typically?) invoked only as object method + sub is_translucent { + my($self, $attr) = @_; + $self = \%ClassData if !ref $self; + return !defined $self->{$attr}; + } + + # test for presence of attribute in class + # invoked as class method or object method + sub has_attribute { + my($self, $attr) = @_; + return exists $ClassData{$attr}; + } + +=head1 NOTES + +Inheritance is a powerful but subtle device, best used only after careful +forethought and design. Aggregation instead of inheritance is often a +better approach. + +We use the hypothetical our() syntax for package variables. It works +like C<use vars>, but looks like my(). It should be in this summer's +major release (5.6) of perl--we hope. + +You can't use file-scoped lexicals in conjunction with the SelfLoader +or the AutoLoader, because they alter the lexical scope in which the +module's methods wind up getting compiled. + +The usual mealy-mouthed package-mungeing doubtless applies to setting +up names of object attributes. For example, C<< $self->{ObData1} >> +should probably be C<< $self->{ __PACKAGE__ . "_ObData1" } >>, but that +would just confuse the examples. + +=head1 SEE ALSO + +L<perltoot>, L<perlobj>, L<perlmod>, and L<perlbot>. + +The Tie::SecureHash module from CPAN is worth checking out. + +=head1 AUTHOR AND COPYRIGHT + +Copyright (c) 1999 Tom Christiansen. +All rights reserved. + +When included as part of the Standard Version of Perl, or as part of +its complete documentation whether printed or otherwise, this work +may be distributed only under the terms of Perl's Artistic License. +Any distribution of this file or derivatives thereof I<outside> +of that package require that special arrangements be made with +copyright holder. + +Irrespective of its distribution, all code examples in this file +are hereby placed into the public domain. You are permitted and +encouraged to use this code in your own programs for fun +or for profit as you see fit. A simple comment in the code giving +credit would be courteous but is not required. + +=head1 ACKNOWLEDGEMENTS + +Russ Albery, Jon Orwant, Randy Ray, Larry Rosler, Nat Torkington, +and Stephen Warren all contributed suggestions and corrections to this +piece. Thanks especially to Damian Conway for his ideas and feedback, +and without whose indirect prodding I might never have taken the time +to show others how much Perl has to offer in the way of objects once +you start thinking outside the tiny little box that today's "popular" +object-oriented languages enforce. + +=head1 HISTORY + +Last edit: Fri May 21 15:47:56 MDT 1999 diff --git a/contrib/perl5/pod/perltrap.pod b/contrib/perl5/pod/perltrap.pod index 852d8e9..261a20f 100644 --- a/contrib/perl5/pod/perltrap.pod +++ b/contrib/perl5/pod/perltrap.pod @@ -22,7 +22,7 @@ The English module, loaded via use English; allows you to refer to special variables (like C<$/>) with names (like -C<$RS>), as though they were in B<awk>; see L<perlvar> for details. +$RS), as though they were in B<awk>; see L<perlvar> for details. =item * @@ -69,7 +69,7 @@ executed.) See L<perlvar>. =item * -$E<lt>I<digit>E<gt> does not refer to fields--it refers to substrings matched +$<I<digit>> does not refer to fields--it refers to substrings matched by the last match pattern. =item * @@ -103,7 +103,7 @@ basically incompatible with C.) The concatenation operator is ".", not the null string. (Using the null string would render C</pat/ /pat/> unparsable, because the third slash would be interpreted as a division operator--the tokenizer is in fact -slightly context sensitive for operators like "/", "?", and "E<gt>". +slightly context sensitive for operators like "/", "?", and ">". And in fact, "." itself can be the beginning of a number.) =item * @@ -160,7 +160,7 @@ You must use C<elsif> rather than C<else if>. The C<break> and C<continue> keywords from C become in Perl C<last> and C<next>, respectively. -Unlike in C, these do I<NOT> work within a C<do { } while> construct. +Unlike in C, these do I<not> work within a C<do { } while> construct. =item * @@ -295,7 +295,7 @@ you might expect to do not. =item * -The E<lt>FHE<gt> construct is not the name of the filehandle, it is a readline +The <FH> construct is not the name of the filehandle, it is a readline operation on that handle. The data read is assigned to $_ only if the file read is the sole condition in a while loop: @@ -305,7 +305,7 @@ file read is the sole condition in a while loop: =item * -Remember not to use "C<=>" when you need "C<=~>"; +Remember not to use C<=> when you need C<=~>; these two constructs are quite different: $x = /foo/; @@ -393,7 +393,8 @@ Everything else. If you find an example of a conversion trap that is not listed here, please submit it to Bill Middleton <F<wjm@best.com>> for inclusion. -Also note that at least some of these can be caught with B<-w>. +Also note that at least some of these can be caught with the +C<use warnings> pragma or the B<-w> switch. =head2 Discontinuance, Deprecation, and BugFix traps @@ -424,7 +425,7 @@ behave differently in perl4 vs. perl5, because the packages don't exist. $a=1;$b=2;$c=3;$var=4; print "$a::$b::$c "; print "$var::abc::xyz\n"; - + # perl4 prints: 1::2::3 4::abc::xyz # perl5 prints: 3 @@ -585,24 +586,6 @@ number of elements in the resulting list. # perl4 prints: second new # perl5 prints: 3 -=item * Discontinuance - -In Perl 4 (and versions of Perl 5 before 5.004), C<'\r'> characters in -Perl code were silently allowed, although they could cause (mysterious!) -failures in certain constructs, particularly here documents. Now, -C<'\r'> characters cause an immediate fatal error. (Note: In this -example, the notation B<\015> represents the incorrect line -ending. Depending upon your text viewer, it will look different.) - - print "foo";\015 - print "bar"; - - # perl4 prints: foobar - # perl5.003 prints: foobar - # perl5.004 dies: Illegal character \015 (carriage return) - -See L<perldiag> for full details. - =item * Deprecation Some error messages will be different. @@ -715,6 +698,30 @@ Logical tests now return an null, instead of 0 Also see L<"General Regular Expression Traps using s///, etc."> for another example of this new feature... +=item * Bitwise string ops + +When bitwise operators which can operate upon either numbers or +strings (C<& | ^ ~>) are given only strings as arguments, perl4 would +treat the operands as bitstrings so long as the program contained a call +to the C<vec()> function. perl5 treats the string operands as bitstrings. +(See L<perlop/Bitwise String Operators> for more details.) + + $fred = "10"; + $barney = "12"; + $betty = $fred & $barney; + print "$betty\n"; + # Uncomment the next line to change perl4's behavior + # ($dummy) = vec("dummy", 0, 0); + + # Perl4 prints: + 8 + + # Perl5 prints: + 10 + + # If vec() is used anywhere in the program, both print: + 10 + =back =head2 General data type traps @@ -761,6 +768,9 @@ Hashes get defined before use # perl4 prints: # perl5 dies: hash %h defined +Perl will now generate a warning when it sees defined(@a) and +defined(%h). + =item * (Globs) glob assignment from variable to variable will fail if the assigned @@ -1056,7 +1066,7 @@ All types of RE traps. =item * Regular Expression C<s'$lhs'$rhs'> now does no interpolation on either side. It used to -interpolate C<$lhs> but not C<$rhs>. (And still does not match a literal +interpolate $lhs but not $rhs. (And still does not match a literal '$' in string) $a=1;$b=2; @@ -1095,7 +1105,7 @@ the very first time in any such closure. For instance, if you say } build_match() will always return a sub which matches the contents of -C<$left> and C<$right> as they were the I<first> time that build_match() +$left and $right as they were the I<first> time that build_match() was called, not as they are in the current call. This is probably a bug, and may change in future versions of Perl. @@ -1257,7 +1267,7 @@ Since version 5.002, Perl uses sigaction() under SysV. =item * (SysV) -Under SysV OSes, C<seek()> on a file opened to append C<E<gt>E<gt>> now does +Under SysV OSes, C<seek()> on a file opened to append C<<< >> >>> now does the right thing w.r.t. the fopen() manpage. e.g., - When a file is opened for append, it is impossible to overwrite information already in the file. @@ -1327,7 +1337,7 @@ Note that you can C<use strict;> to ward off such trappiness under perl5. =item * Interpolation The construct "this is $$x" used to interpolate the pid at that -point, but now apparently tries to dereference C<$x>. C<$$> by itself still +point, but now apparently tries to dereference $x. C<$$> by itself still works fine, however. print "this is $$x\n"; diff --git a/contrib/perl5/pod/perlunicode.pod b/contrib/perl5/pod/perlunicode.pod new file mode 100644 index 0000000..5333ac4 --- /dev/null +++ b/contrib/perl5/pod/perlunicode.pod @@ -0,0 +1,244 @@ +=head1 NAME + +perlunicode - Unicode support in Perl + +=head1 DESCRIPTION + +=head2 Important Caveat + +WARNING: The implementation of Unicode support in Perl is incomplete. + +The following areas need further work. + +=over + +=item Input and Output Disciplines + +There is currently no easy way to mark data read from a file or other +external source as being utf8. This will be one of the major areas of +focus in the near future. + +=item Regular Expressions + +The existing regular expression compiler does not produce polymorphic +opcodes. This means that the determination on whether to match Unicode +characters is made when the pattern is compiled, based on whether the +pattern contains Unicode characters, and not when the matching happens +at run time. This needs to be changed to adaptively match Unicode if +the string to be matched is Unicode. + +=item C<use utf8> still needed to enable a few features + +The C<utf8> pragma implements the tables used for Unicode support. These +tables are automatically loaded on demand, so the C<utf8> pragma need not +normally be used. + +However, as a compatibility measure, this pragma must be explicitly used +to enable recognition of UTF-8 encoded literals and identifiers in the +source text. + +=back + +=head2 Byte and Character semantics + +Beginning with version 5.6, Perl uses logically wide characters to +represent strings internally. This internal representation of strings +uses the UTF-8 encoding. + +In future, Perl-level operations can be expected to work with characters +rather than bytes, in general. + +However, as strictly an interim compatibility measure, Perl v5.6 aims to +provide a safe migration path from byte semantics to character semantics +for programs. For operations where Perl can unambiguously decide that the +input data is characters, Perl now switches to character semantics. +For operations where this determination cannot be made without additional +information from the user, Perl decides in favor of compatibility, and +chooses to use byte semantics. + +This behavior preserves compatibility with earlier versions of Perl, +which allowed byte semantics in Perl operations, but only as long as +none of the program's inputs are marked as being as source of Unicode +character data. Such data may come from filehandles, from calls to +external programs, from information provided by the system (such as %ENV), +or from literals and constants in the source text. + +If the C<-C> command line switch is used, (or the ${^WIDE_SYSTEM_CALLS} +global flag is set to C<1>), all system calls will use the +corresponding wide character APIs. This is currently only implemented +on Windows. + +Regardless of the above, the C<bytes> pragma can always be used to force +byte semantics in a particular lexical scope. See L<bytes>. + +The C<utf8> pragma is primarily a compatibility device that enables +recognition of UTF-8 in literals encountered by the parser. It may also +be used for enabling some of the more experimental Unicode support features. +Note that this pragma is only required until a future version of Perl +in which character semantics will become the default. This pragma may +then become a no-op. See L<utf8>. + +Unless mentioned otherwise, Perl operators will use character semantics +when they are dealing with Unicode data, and byte semantics otherwise. +Thus, character semantics for these operations apply transparently; if +the input data came from a Unicode source (for example, by adding a +character encoding discipline to the filehandle whence it came, or a +literal UTF-8 string constant in the program), character semantics +apply; otherwise, byte semantics are in effect. To force byte semantics +on Unicode data, the C<bytes> pragma should be used. + +Under character semantics, many operations that formerly operated on +bytes change to operating on characters. For ASCII data this makes +no difference, because UTF-8 stores ASCII in single bytes, but for +any character greater than C<chr(127)>, the character may be stored in +a sequence of two or more bytes, all of which have the high bit set. +But by and large, the user need not worry about this, because Perl +hides it from the user. A character in Perl is logically just a number +ranging from 0 to 2**32 or so. Larger characters encode to longer +sequences of bytes internally, but again, this is just an internal +detail which is hidden at the Perl level. + +=head2 Effects of character semantics + +Character semantics have the following effects: + +=over 4 + +=item * + +Strings and patterns may contain characters that have an ordinal value +larger than 255. + +Presuming you use a Unicode editor to edit your program, such characters +will typically occur directly within the literal strings as UTF-8 +characters, but you can also specify a particular character with an +extension of the C<\x> notation. UTF-8 characters are specified by +putting the hexadecimal code within curlies after the C<\x>. For instance, +a Unicode smiley face is C<\x{263A}>. A character in the Latin-1 range +(128..255) should be written C<\x{ab}> rather than C<\xab>, since the +former will turn into a two-byte UTF-8 code, while the latter will +continue to be interpreted as generating a 8-bit byte rather than a +character. In fact, if the C<use warnings> pragma of the C<-w> switch +is turned on, it will produce a warning +that you might be generating invalid UTF-8. + +=item * + +Identifiers within the Perl script may contain Unicode alphanumeric +characters, including ideographs. (You are currently on your own when +it comes to using the canonical forms of characters--Perl doesn't (yet) +attempt to canonicalize variable names for you.) + +=item * + +Regular expressions match characters instead of bytes. For instance, +"." matches a character instead of a byte. (However, the C<\C> pattern +is provided to force a match a single byte ("C<char>" in C, hence +C<\C>).) + +=item * + +Character classes in regular expressions match characters instead of +bytes, and match against the character properties specified in the +Unicode properties database. So C<\w> can be used to match an ideograph, +for instance. + +=item * + +Named Unicode properties and block ranges make be used as character +classes via the new C<\p{}> (matches property) and C<\P{}> (doesn't +match property) constructs. For instance, C<\p{Lu}> matches any +character with the Unicode uppercase property, while C<\p{M}> matches +any mark character. Single letter properties may omit the brackets, so +that can be written C<\pM> also. Many predefined character classes are +available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. + +=item * + +The special pattern C<\X> match matches any extended Unicode sequence +(a "combining character sequence" in Standardese), where the first +character is a base character and subsequent characters are mark +characters that apply to the base character. It is equivalent to +C<(?:\PM\pM*)>. + +=item * + +The C<tr///> operator translates characters instead of bytes. It can also +be forced to translate between 8-bit codes and UTF-8. For instance, if you +know your input in Latin-1, you can say: + + while (<>) { + tr/\0-\xff//CU; # latin1 char to utf8 + ... + } + +Similarly you could translate your output with + + tr/\0-\x{ff}//UC; # utf8 to latin1 char + +No, C<s///> doesn't take /U or /C (yet?). + +=item * + +Case translation operators use the Unicode case translation tables +when provided character input. Note that C<uc()> translates to +uppercase, while C<ucfirst> translates to titlecase (for languages +that make the distinction). Naturally the corresponding backslash +sequences have the same semantics. + +=item * + +Most operators that deal with positions or lengths in the string will +automatically switch to using character positions, including C<chop()>, +C<substr()>, C<pos()>, C<index()>, C<rindex()>, C<sprintf()>, +C<write()>, and C<length()>. Operators that specifically don't switch +include C<vec()>, C<pack()>, and C<unpack()>. Operators that really +don't care include C<chomp()>, as well as any other operator that +treats a string as a bucket of bits, such as C<sort()>, and the +operators dealing with filenames. + +=item * + +The C<pack()>/C<unpack()> letters "C<c>" and "C<C>" do I<not> change, +since they're often used for byte-oriented formats. (Again, think +"C<char>" in the C language.) However, there is a new "C<U>" specifier +that will convert between UTF-8 characters and integers. (It works +outside of the utf8 pragma too.) + +=item * + +The C<chr()> and C<ord()> functions work on characters. This is like +C<pack("U")> and C<unpack("U")>, not like C<pack("C")> and +C<unpack("C")>. In fact, the latter are how you now emulate +byte-oriented C<chr()> and C<ord()> under utf8. + +=item * + +And finally, C<scalar reverse()> reverses by character rather than by byte. + +=back + +=head2 Character encodings for input and output + +[XXX: This feature is not yet implemented.] + +=head1 CAVEATS + +As of yet, there is no method for automatically coercing input and +output to some encoding other than UTF-8. This is planned in the near +future, however. + +Whether an arbitrary piece of data will be treated as "characters" or +"bytes" by internal operations cannot be divined at the current time. + +Use of locales with utf8 may lead to odd results. Currently there is +some attempt to apply 8-bit locale info to characters in the range +0..255, but this is demonstrably incorrect for locales that use +characters above that range (when mapped into Unicode). It will also +tend to run slower. Avoidance of locales is strongly encouraged. + +=head1 SEE ALSO + +L<bytes>, L<utf8>, L<perlvar/"${^WIDE_SYSTEM_CALLS}"> + +=cut diff --git a/contrib/perl5/pod/perlvar.pod b/contrib/perl5/pod/perlvar.pod index 8d0ded6..04fb3fe 100644 --- a/contrib/perl5/pod/perlvar.pod +++ b/contrib/perl5/pod/perlvar.pod @@ -7,9 +7,9 @@ perlvar - Perl predefined variables =head2 Predefined Names The following names have special meaning to Perl. Most -punctuation names have reasonable mnemonics, or analogues in one of -the shells. Nevertheless, if you wish to use long variable names, -you just need to say +punctuation names have reasonable mnemonics, or analogs in the +shells. Nevertheless, if you wish to use long variable names, +you need only say use English; @@ -17,21 +17,12 @@ at the top of your program. This will alias all the short names to the long names in the current package. Some even have medium names, generally borrowed from B<awk>. -Due to an unfortunate accident of Perl's implementation, "C<use English>" -imposes a considerable performance penalty on all regular expression -matches in a program, regardless of whether they occur in the scope of -"C<use English>". For that reason, saying "C<use English>" in -libraries is strongly discouraged. See the Devel::SawAmpersand module -documentation from CPAN -(http://www.perl.com/CPAN/modules/by-module/Devel/Devel-SawAmpersand-0.10.readme) -for more information. - -To go a step further, those variables that depend on the currently -selected filehandle may instead (and preferably) be set by calling an -object method on the FileHandle object. (Summary lines below for this -contain the word HANDLE.) First you must say +If you don't mind the performance hit, variables that depend on the +currently selected filehandle may instead be set by calling an +appropriate object method on the IO::Handle object. (Summary lines +below for this contain the word HANDLE.) First you must say - use FileHandle; + use IO::Handle; after which you may use either @@ -41,21 +32,20 @@ or more safely, HANDLE->method(EXPR) -Each of the methods returns the old value of the FileHandle attribute. +Each method returns the old value of the IO::Handle attribute. The methods each take an optional EXPR, which if supplied specifies the -new value for the FileHandle attribute in question. If not supplied, -most of the methods do nothing to the current value, except for +new value for the IO::Handle attribute in question. If not supplied, +most methods do nothing to the current value--except for autoflush(), which will assume a 1 for you, just to be different. +Because loading in the IO::Handle class is an expensive operation, you should +learn how to use the regular built-in variables. A few of these variables are considered "read-only". This means that if you try to assign to this variable, either directly or indirectly through a reference, you'll raise a run-time exception. The following list is ordered by scalar variables first, then the -arrays, then the hashes (except $^M was added in the wrong place). -This is somewhat obscured by the fact that %ENV and %SIG are listed as -$ENV{expr} and $SIG{expr}. - +arrays, then the hashes. =over 8 @@ -66,7 +56,7 @@ $ENV{expr} and $SIG{expr}. The default input and pattern-searching space. The following pairs are equivalent: - while (<>) {...} # equivalent in only while! + while (<>) {...} # equivalent only in while! while (defined($_ = <>)) {...} /^Subject:/ @@ -75,8 +65,8 @@ equivalent: tr/a-z/A-Z/ $_ =~ tr/a-z/A-Z/ - chop - chop($_) + chomp + chomp($_) Here are the places where Perl will assume $_ even if you don't use it: @@ -109,9 +99,9 @@ The implicit iterator variable in the grep() and map() functions. =item * -The default place to put an input record when a C<E<lt>FHE<gt>> +The default place to put an input record when a C<< <FH> >> operation's result is tested by itself as the sole criterion of a C<while> -test. Note that outside of a C<while> test, this will not happen. +test. Outside a C<while> test, this will not happen. =back @@ -121,12 +111,13 @@ test. Note that outside of a C<while> test, this will not happen. =over 8 -=item $E<lt>I<digits>E<gt> +=item $<I<digits>> -Contains the subpattern from the corresponding set of parentheses in -the last pattern matched, not counting patterns matched in nested -blocks that have been exited already. (Mnemonic: like \digits.) -These variables are all read-only. +Contains the subpattern from the corresponding set of capturing +parentheses from the last pattern match, not counting patterns +matched in nested blocks that have been exited already. (Mnemonic: +like \digits.) These variables are all read-only and dynamically +scoped to the current BLOCK. =item $MATCH @@ -134,11 +125,11 @@ These variables are all read-only. The string matched by the last successful pattern match (not counting any matches hidden within a BLOCK or eval() enclosed by the current -BLOCK). (Mnemonic: like & in some editors.) This variable is read-only. +BLOCK). (Mnemonic: like & in some editors.) This variable is read-only +and dynamically scoped to the current BLOCK. The use of this variable anywhere in a program imposes a considerable -performance penalty on all regular expression matches. See the -Devel::SawAmpersand module from CPAN for more information. +performance penalty on all regular expression matches. See L<BUGS>. =item $PREMATCH @@ -150,8 +141,7 @@ enclosed by the current BLOCK). (Mnemonic: C<`> often precedes a quoted string.) This variable is read-only. The use of this variable anywhere in a program imposes a considerable -performance penalty on all regular expression matches. See the -Devel::SawAmpersand module from CPAN for more information. +performance penalty on all regular expression matches. See L<BUGS>. =item $POSTMATCH @@ -166,24 +156,36 @@ string.) Example: /def/; print "$`:$&:$'\n"; # prints abc:def:ghi -This variable is read-only. +This variable is read-only and dynamically scoped to the current BLOCK. The use of this variable anywhere in a program imposes a considerable -performance penalty on all regular expression matches. See the -Devel::SawAmpersand module from CPAN for more information. +performance penalty on all regular expression matches. See L<BUGS>. =item $LAST_PAREN_MATCH =item $+ The last bracket matched by the last search pattern. This is useful if -you don't know which of a set of alternative patterns matched. For +you don't know which one of a set of alternative patterns matched. For example: /Version: (.*)|Revision: (.*)/ && ($rev = $+); (Mnemonic: be positive and forward looking.) -This variable is read-only. +This variable is read-only and dynamically scoped to the current BLOCK. + +=item @+ + +This array holds the offsets of the ends of the last successful +submatches in the currently active dynamic scope. C<$+[0]> is +the offset into the string of the end of the entire match. This +is the same value as what the C<pos> function returns when called +on the variable that was matched against. The I<n>th element +of this array holds the offset of the I<n>th submatch, so +C<$+[1]> is the offset past where $1 ends, C<$+[2]> the offset +past where $2 ends, and so on. You can use C<$#+> to determine +how many subgroups were in the last successful match. See the +examples given for the C<@-> variable. =item $MULTILINE_MATCHING @@ -192,12 +194,12 @@ This variable is read-only. Set to 1 to do multi-line matching within a string, 0 to tell Perl that it can assume that strings contain a single line, for the purpose of optimizing pattern matches. Pattern matches on strings containing -multiple newlines can produce confusing results when "C<$*>" is 0. Default -is 0. (Mnemonic: * matches multiple things.) Note that this variable -influences the interpretation of only "C<^>" and "C<$>". A literal newline can +multiple newlines can produce confusing results when C<$*> is 0. Default +is 0. (Mnemonic: * matches multiple things.) This variable +influences the interpretation of only C<^> and C<$>. A literal newline can be searched for even when C<$* == 0>. -Use of "C<$*>" is deprecated in modern Perls, supplanted by +Use of C<$*> is deprecated in modern Perl, supplanted by the C</s> and C</m> modifiers on pattern matching. =item input_line_number HANDLE EXPR @@ -208,15 +210,16 @@ the C</s> and C</m> modifiers on pattern matching. =item $. -The current input line number for the last file handle from -which you read (or performed a C<seek> or C<tell> on). The value +The current input record number for the last file handle from which +you just read() (or called a C<seek> or C<tell> on). The value may be different from the actual physical line number in the file, -depending on what notion of "line" is in effect--see L<$/> on how -to affect that. An -explicit close on a filehandle resets the line number. Because -"C<E<lt>E<gt>>" never does an explicit close, line numbers increase -across ARGV files (but see examples under eof()). Localizing C<$.> has -the effect of also localizing Perl's notion of "the last read +depending on what notion of "line" is in effect--see C<$/> on how +to change that. An explicit close on a filehandle resets the line +number. Because C<< <> >> never does an explicit close, line +numbers increase across ARGV files (but see examples in L<perlfunc/eof>). +Consider this variable read-only: setting it does not reposition +the seek pointer; you'll have to do that on your own. Localizing C<$.> +has the effect of also localizing Perl's notion of "the last read filehandle". (Mnemonic: many programs use "." to mean the current line number.) @@ -228,48 +231,50 @@ number.) =item $/ -The input record separator, newline by default. This is used to -influence Perl's idea of what a "line" is. Works like B<awk>'s RS -variable, including treating empty lines as delimiters if set to the -null string. (Note: An empty line cannot contain any spaces or tabs.) -You may set it to a multi-character string to match a multi-character -delimiter, or to C<undef> to read to end of file. Note that setting it -to C<"\n\n"> means something slightly different than setting it to -C<"">, if the file contains consecutive empty lines. Setting it to -C<""> will treat two or more consecutive empty lines as a single empty -line. Setting it to C<"\n\n"> will blindly assume that the next input -character belongs to the next paragraph, even if it's a newline. -(Mnemonic: / is used to delimit line boundaries when quoting poetry.) +The input record separator, newline by default. This +influences Perl's idea of what a "line" is. Works like B<awk>'s RS +variable, including treating empty lines as a terminator if set to +the null string. (An empty line cannot contain any spaces +or tabs.) You may set it to a multi-character string to match a +multi-character terminator, or to C<undef> to read through the end +of file. Setting it to C<"\n\n"> means something slightly +different than setting to C<"">, if the file contains consecutive +empty lines. Setting to C<""> will treat two or more consecutive +empty lines as a single empty line. Setting to C<"\n\n"> will +blindly assume that the next input character belongs to the next +paragraph, even if it's a newline. (Mnemonic: / delimits +line boundaries when quoting poetry.) undef $/; # enable "slurp" mode $_ = <FH>; # whole file now here s/\n[ \t]+/ /g; -Remember: the value of $/ is a string, not a regexp. AWK has to be -better for something :-) +Remember: the value of C<$/> is a string, not a regex. B<awk> has to be +better for something. :-) -Setting $/ to a reference to an integer, scalar containing an integer, or -scalar that's convertable to an integer will attempt to read records +Setting C<$/> to a reference to an integer, scalar containing an integer, or +scalar that's convertible to an integer will attempt to read records instead of lines, with the maximum record size being the referenced -integer. So this: +integer. So this: $/ = \32768; # or \"32768", or \$var_containing_32768 open(FILE, $myfile); $_ = <FILE>; -will read a record of no more than 32768 bytes from FILE. If you're not -reading from a record-oriented file (or your OS doesn't have -record-oriented files), then you'll likely get a full chunk of data with -every read. If a record is larger than the record size you've set, you'll -get the record back in pieces. +will read a record of no more than 32768 bytes from FILE. If you're +not reading from a record-oriented file (or your OS doesn't have +record-oriented files), then you'll likely get a full chunk of data +with every read. If a record is larger than the record size you've +set, you'll get the record back in pieces. -On VMS, record reads are done with the equivalent of C<sysread>, so it's -best not to mix record and non-record reads on the same file. (This is -likely not a problem, as any file you'd want to read in record mode is -probably usable in line mode) Non-VMS systems perform normal I/O, so -it's safe to mix record and non-record reads of a file. +On VMS, record reads are done with the equivalent of C<sysread>, +so it's best not to mix record and non-record reads on the same +file. (This is unlikely to be a problem, because any file you'd +want to read in record mode is probably unusable in line mode.) +Non-VMS systems do normal I/O, so it's safe to mix record and +non-record reads of a file. -Also see L<$.>. +See also L<perlport/"Newlines">. Also see C<$.>. =item autoflush HANDLE EXPR @@ -277,16 +282,17 @@ Also see L<$.>. =item $| -If set to nonzero, forces a flush right away and after every write or print on the -currently selected output channel. Default is 0 (regardless of whether -the channel is actually buffered by the system or not; C<$|> tells you -only whether you've asked Perl explicitly to flush after each write). -Note that STDOUT will typically be line buffered if output is to the -terminal and block buffered otherwise. Setting this variable is useful -primarily when you are outputting to a pipe, such as when you are running -a Perl script under rsh and want to see the output as it's happening. This -has no effect on input buffering. -(Mnemonic: when you want your pipes to be piping hot.) +If set to nonzero, forces a flush right away and after every write +or print on the currently selected output channel. Default is 0 +(regardless of whether the channel is really buffered by the +system or not; C<$|> tells you only whether you've asked Perl +explicitly to flush after each write). STDOUT will +typically be line buffered if output is to the terminal and block +buffered otherwise. Setting this variable is useful primarily when +you are outputting to a pipe or socket, such as when you are running +a Perl program under B<rsh> and want to see the output as it's +happening. This has no effect on input buffering. See L<perlfunc/getc> +for that. (Mnemonic: when you want your pipes to be piping hot.) =item output_field_separator HANDLE EXPR @@ -297,11 +303,11 @@ has no effect on input buffering. =item $, The output field separator for the print operator. Ordinarily the -print operator simply prints out the comma-separated fields you -specify. To get behavior more like B<awk>, set this variable -as you would set B<awk>'s OFS variable to specify what is printed -between fields. (Mnemonic: what is printed when there is a , in your -print statement.) +print operator simply prints out its arguments without further +adornment. To get behavior more like B<awk>, set this variable as +you would set B<awk>'s OFS variable to specify what is printed +between fields. (Mnemonic: what is printed when there is a "," in +your print statement.) =item output_record_separator HANDLE EXPR @@ -312,21 +318,21 @@ print statement.) =item $\ The output record separator for the print operator. Ordinarily the -print operator simply prints out the comma-separated fields you -specify, with no trailing newline or record separator assumed. -To get behavior more like B<awk>, set this variable as you would -set B<awk>'s ORS variable to specify what is printed at the end of the -print. (Mnemonic: you set "C<$\>" instead of adding \n at the end of the -print. Also, it's just like C<$/>, but it's what you get "back" from -Perl.) +print operator simply prints out its arguments as is, with no +trailing newline or other end-of-record string added. To get +behavior more like B<awk>, set this variable as you would set +B<awk>'s ORS variable to specify what is printed at the end of the +print. (Mnemonic: you set C<$\> instead of adding "\n" at the +end of the print. Also, it's just like C<$/>, but it's what you +get "back" from Perl.) =item $LIST_SEPARATOR =item $" -This is like "C<$,>" except that it applies to array values interpolated -into a double-quoted string (or similar interpreted string). Default -is a space. (Mnemonic: obvious, I think.) +This is like C<$,> except that it applies to array and slice values +interpolated into a double-quoted string (or similar interpreted +string). Default is a space. (Mnemonic: obvious, I think.) =item $SUBSCRIPT_SEPARATOR @@ -351,13 +357,14 @@ which means ($foo{$a},$foo{$b},$foo{$c}) -Default is "\034", the same as SUBSEP in B<awk>. Note that if your -keys contain binary data there might not be any safe value for "C<$;>". +Default is "\034", the same as SUBSEP in B<awk>. If your +keys contain binary data there might not be any safe value for C<$;>. (Mnemonic: comma (the syntactic subscript separator) is a -semi-semicolon. Yeah, I know, it's pretty lame, but "C<$,>" is already +semi-semicolon. Yeah, I know, it's pretty lame, but C<$,> is already taken for something more important.) -Consider using "real" multidimensional arrays. +Consider using "real" multidimensional arrays as described +in L<perllol>. =item $OFMT @@ -365,13 +372,13 @@ Consider using "real" multidimensional arrays. The output format for printed numbers. This variable is a half-hearted attempt to emulate B<awk>'s OFMT variable. There are times, however, -when B<awk> and Perl have differing notions of what is in fact -numeric. The initial value is %.I<n>g, where I<n> is the value +when B<awk> and Perl have differing notions of what counts as +numeric. The initial value is "%.I<n>g", where I<n> is the value of the macro DBL_DIG from your system's F<float.h>. This is different from -B<awk>'s default OFMT setting of %.6g, so you need to set "C<$#>" +B<awk>'s default OFMT setting of "%.6g", so you need to set C<$#> explicitly to get B<awk>'s value. (Mnemonic: # is the number sign.) -Use of "C<$#>" is deprecated. +Use of C<$#> is deprecated. =item format_page_number HANDLE EXPR @@ -380,6 +387,7 @@ Use of "C<$#>" is deprecated. =item $% The current page number of the currently selected output channel. +Used with formats. (Mnemonic: % is page number in B<nroff>.) =item format_lines_per_page HANDLE EXPR @@ -389,7 +397,9 @@ The current page number of the currently selected output channel. =item $= The current page length (printable lines) of the currently selected -output channel. Default is 60. (Mnemonic: = has horizontal lines.) +output channel. Default is 60. +Used with formats. +(Mnemonic: = has horizontal lines.) =item format_lines_left HANDLE EXPR @@ -398,7 +408,50 @@ output channel. Default is 60. (Mnemonic: = has horizontal lines.) =item $- The number of lines left on the page of the currently selected output -channel. (Mnemonic: lines_on_page - lines_printed.) +channel. +Used with formats. +(Mnemonic: lines_on_page - lines_printed.) + +=item @- + +$-[0] is the offset of the start of the last successful match. +C<$-[>I<n>C<]> is the offset of the start of the substring matched by +I<n>-th subpattern, or undef if the subpattern did not match. + +Thus after a match against $_, $& coincides with C<substr $_, $-[0], +$+[0] - $-[0]>. Similarly, C<$>I<n> coincides with C<substr $_, $-[>I<n>C<], +$+[>I<n>C<] - $-[>I<n>C<]> if C<$-[>I<n>C<]> is defined, and $+ coincides with +C<substr $_, $-[$#-], $+[$#-]>. One can use C<$#-> to find the last +matched subgroup in the last successful match. Contrast with +C<$#+>, the number of subgroups in the regular expression. Compare +with C<@+>. + +This array holds the offsets of the beginnings of the last +successful submatches in the currently active dynamic scope. +C<$-[0]> is the offset into the string of the beginning of the +entire match. The I<n>th element of this array holds the offset +of the I<n>th submatch, so C<$+[1]> is the offset where $1 +begins, C<$+[2]> the offset where $2 begins, and so on. +You can use C<$#-> to determine how many subgroups were in the +last successful match. Compare with the C<@+> variable. + +After a match against some variable $var: + +=over 5 + +=item C<$`> is the same as C<substr($var, 0, $-[0]>) + +=item C<$&> is the same as C<substr($var, $-[0], $+[0] - $-[0]>) + +=item C<$'> is the same as C<substr($var, $+[0]>) + +=item C<$1> is the same as C<substr($var, $-[1], $+[1] - $-[1])> + +=item C<$2> is the same as C<substr($var, $-[2], $+[2] - $-[2])> + +=item C<$3> is the same as C<substr $var, $-[3], $+[3] - $-[3]>) + +=back =item format_name HANDLE EXPR @@ -407,8 +460,8 @@ channel. (Mnemonic: lines_on_page - lines_printed.) =item $~ The name of the current report format for the currently selected output -channel. Default is name of the filehandle. (Mnemonic: brother to -"C<$^>".) +channel. Default is the name of the filehandle. (Mnemonic: brother to +C<$^>.) =item format_top_name HANDLE EXPR @@ -417,7 +470,7 @@ channel. Default is name of the filehandle. (Mnemonic: brother to =item $^ The name of the current top-of-page format for the currently selected -output channel. Default is name of the filehandle with _TOP +output channel. Default is the name of the filehandle with _TOP appended. (Mnemonic: points to top of page.) =item format_line_break_characters HANDLE EXPR @@ -437,16 +490,16 @@ poetry is a part of a line.) =item $^L -What formats output to perform a form feed. Default is \f. +What formats output as a form feed. Default is \f. =item $ACCUMULATOR =item $^A The current value of the write() accumulator for format() lines. A format -contains formline() commands that put their result into C<$^A>. After +contains formline() calls that put their result into C<$^A>. After calling its format, write() prints out the contents of C<$^A> and empties. -So you never actually see the contents of C<$^A> unless you call +So you never really see the contents of C<$^A> unless you call formline() yourself and then look at it. See L<perlform> and L<perlfunc/formline()>. @@ -455,21 +508,27 @@ L<perlfunc/formline()>. =item $? The status returned by the last pipe close, backtick (C<``>) command, -or system() operator. Note that this is the status word returned by the -wait() system call (or else is made up to look like it). Thus, the exit -value of the subprocess is actually (C<$? E<gt>E<gt> 8>), and C<$? & 127> -gives which signal, if any, the process died from, and C<$? & 128> reports -whether there was a core dump. (Mnemonic: similar to B<sh> and B<ksh>.) +successful call to wait() or waitpid(), or from the system() +operator. This is just the 16-bit status word returned by the +wait() system call (or else is made up to look like it). Thus, the +exit value of the subprocess is really (C<<< $? >> 8 >>>), and +C<$? & 127> gives which signal, if any, the process died from, and +C<$? & 128> reports whether there was a core dump. (Mnemonic: +similar to B<sh> and B<ksh>.) Additionally, if the C<h_errno> variable is supported in C, its value -is returned via $? if any of the C<gethost*()> functions fail. +is returned via $? if any C<gethost*()> function fails. -Note that if you have installed a signal handler for C<SIGCHLD>, the +If you have installed a signal handler for C<SIGCHLD>, the value of C<$?> will usually be wrong outside that handler. Inside an C<END> subroutine C<$?> contains the value that is going to be given to C<exit()>. You can modify C<$?> in an C<END> subroutine to -change the exit status of the script. +change the exit status of your program. For example: + + END { + $? = 1 if $? == 255; # die would make it 255 + } Under VMS, the pragma C<use vmsish 'status'> makes C<$?> reflect the actual VMS exit status, instead of the default emulation of POSIX @@ -483,14 +542,15 @@ Also see L<Error Indicators>. =item $! -If used in a numeric context, yields the current value of errno, with -all the usual caveats. (This means that you shouldn't depend on the -value of C<$!> to be anything in particular unless you've gotten a -specific error return indicating a system error.) If used in a string -context, yields the corresponding system error string. You can assign -to C<$!> to set I<errno> if, for instance, you want C<"$!"> to return the -string for error I<n>, or you want to set the exit value for the die() -operator. (Mnemonic: What just went bang?) +If used numerically, yields the current value of the C C<errno> +variable, with all the usual caveats. (This means that you shouldn't +depend on the value of C<$!> to be anything in particular unless +you've gotten a specific error return indicating a system error.) +If used an a string, yields the corresponding system error string. +You can assign a number to C<$!> to set I<errno> if, for instance, +you want C<"$!"> to return the string for error I<n>, or you want +to set the exit value for the die() operator. (Mnemonic: What just +went bang?) Also see L<Error Indicators>. @@ -514,7 +574,7 @@ OS/2 API either via CRT, or directly from perl. Under Win32, C<$^E> always returns the last error information reported by the Win32 call C<GetLastError()> which describes the last error from within the Win32 API. Most Win32-specific -code will report errors via C<$^E>. ANSI C and UNIX-like calls +code will report errors via C<$^E>. ANSI C and Unix-like calls set C<errno> and so most portable Perl code will report errors via C<$!>. @@ -527,12 +587,12 @@ Also see L<Error Indicators>. =item $@ -The Perl syntax error message from the last eval() command. If null, the +The Perl syntax error message from the last eval() operator. If null, the last eval() parsed and executed correctly (although the operations you invoked may have failed in the normal fashion). (Mnemonic: Where was the syntax error "at"?) -Note that warning messages are not collected in this variable. You can, +Warning messages are not collected in this variable. You can, however, set up a routine to process warnings by setting C<$SIG{__WARN__}> as described below. @@ -544,8 +604,9 @@ Also see L<Error Indicators>. =item $$ -The process number of the Perl running this script. (Mnemonic: same -as shells.) +The process number of the Perl running this script. You should +consider this variable read-only, although it will be altered +across fork() calls. (Mnemonic: same as shells.) =item $REAL_USER_ID @@ -553,7 +614,7 @@ as shells.) =item $< -The real uid of this process. (Mnemonic: it's the uid you came I<FROM>, +The real uid of this process. (Mnemonic: it's the uid you came I<from>, if you're running setuid.) =item $EFFECTIVE_USER_ID @@ -567,8 +628,8 @@ The effective uid of this process. Example: $< = $>; # set real to effective uid ($<,$>) = ($>,$<); # swap real and effective uid -(Mnemonic: it's the uid you went I<TO>, if you're running setuid.) -Note: "C<$E<lt>>" and "C<$E<gt>>" can be swapped only on machines +(Mnemonic: it's the uid you went I<to>, if you're running setuid.) +C<< $< >> and C<< $> >> can be swapped only on machines supporting setreuid(). =item $REAL_GROUP_ID @@ -583,12 +644,12 @@ list of groups you are in. The first number is the one returned by getgid(), and the subsequent ones by getgroups(), one of which may be the same as the first number. -However, a value assigned to "C<$(>" must be a single number used to -set the real gid. So the value given by "C<$(>" should I<not> be assigned -back to "C<$(>" without being forced numeric, such as by adding zero. +However, a value assigned to C<$(> must be a single number used to +set the real gid. So the value given by C<$(> should I<not> be assigned +back to C<$(> without being forced numeric, such as by adding zero. -(Mnemonic: parentheses are used to I<GROUP> things. The real gid is the -group you I<LEFT>, if you're running setgid.) +(Mnemonic: parentheses are used to I<group> things. The real gid is the +group you I<left>, if you're running setgid.) =item $EFFECTIVE_GROUP_ID @@ -602,44 +663,41 @@ separated list of groups you are in. The first number is the one returned by getegid(), and the subsequent ones by getgroups(), one of which may be the same as the first number. -Similarly, a value assigned to "C<$)>" must also be a space-separated -list of numbers. The first number is used to set the effective gid, and +Similarly, a value assigned to C<$)> must also be a space-separated +list of numbers. The first number sets the effective gid, and the rest (if any) are passed to setgroups(). To get the effect of an empty list for setgroups(), just repeat the new effective gid; that is, to force an effective gid of 5 and an effectively empty setgroups() list, say C< $) = "5 5" >. -(Mnemonic: parentheses are used to I<GROUP> things. The effective gid -is the group that's I<RIGHT> for you, if you're running setgid.) +(Mnemonic: parentheses are used to I<group> things. The effective gid +is the group that's I<right> for you, if you're running setgid.) -Note: "C<$E<lt>>", "C<$E<gt>>", "C<$(>" and "C<$)>" can be set only on -machines that support the corresponding I<set[re][ug]id()> routine. "C<$(>" -and "C<$)>" can be swapped only on machines supporting setregid(). +C<< $< >>, C<< $> >>, C<$(> and C<$)> can be set only on +machines that support the corresponding I<set[re][ug]id()> routine. C<$(> +and C<$)> can be swapped only on machines supporting setregid(). =item $PROGRAM_NAME =item $0 -Contains the name of the file containing the Perl script being -executed. On some operating systems -assigning to "C<$0>" modifies the argument area that the ps(1) -program sees. This is more useful as a way of indicating the -current program state than it is for hiding the program you're running. +Contains the name of the program being executed. On some operating +systems assigning to C<$0> modifies the argument area that the B<ps> +program sees. This is more useful as a way of indicating the current +program state than it is for hiding the program you're running. (Mnemonic: same as B<sh> and B<ksh>.) =item $[ The index of the first element in an array, and of the first character -in a substring. Default is 0, but you could set it to 1 to make -Perl behave more like B<awk> (or Fortran) when subscripting and when -evaluating the index() and substr() functions. (Mnemonic: [ begins -subscripts.) +in a substring. Default is 0, but you could theoretically set it +to 1 to make Perl behave more like B<awk> (or Fortran) when +subscripting and when evaluating the index() and substr() functions. +(Mnemonic: [ begins subscripts.) -As of Perl 5, assignment to "C<$[>" is treated as a compiler directive, -and cannot influence the behavior of any other file. Its use is -discouraged. - -=item $PERL_VERSION +As of release 5 of Perl, assignment to C<$[> is treated as a compiler +directive, and cannot influence the behavior of any other file. +Its use is highly discouraged. =item $] @@ -651,16 +709,22 @@ of perl in the right bracket?) Example: warn "No checksumming!\n" if $] < 3.019; See also the documentation of C<use VERSION> and C<require VERSION> -for a convenient way to fail if the Perl interpreter is too old. +for a convenient way to fail if the running Perl interpreter is too old. + +The use of this variable is deprecated. The floating point representation +can sometimes lead to inaccurate numeric comparisons. See C<$^V> for a +more modern representation of the Perl version that allows accurate string +comparisons. =item $COMPILING =item $^C -The current value of the flag associated with the B<-c> switch. Mainly -of use with B<-MO=...> to allow code to alter its behaviour when being compiled. -(For example to automatically AUTOLOADing at compile time rather than normal -deferred loading.) Setting C<$^C = 1> is similar to calling C<B::minus_c>. +The current value of the flag associated with the B<-c> switch. +Mainly of use with B<-MO=...> to allow code to alter its behavior +when being compiled, such as for example to AUTOLOAD at compile +time rather than normal, deferred loading. See L<perlcc>. Setting +C<$^C = 1> is similar to calling C<B::minus_c>. =item $DEBUGGING @@ -677,14 +741,61 @@ The maximum system file descriptor, ordinarily 2. System file descriptors are passed to exec()ed processes, while higher file descriptors are not. Also, during an open(), system file descriptors are preserved even if the open() fails. (Ordinary file descriptors are -closed before the open() is attempted.) Note that the close-on-exec +closed before the open() is attempted.) The close-on-exec status of a file descriptor will be decided according to the value of -C<$^F> when the open() or pipe() was called, not the time of the exec(). +C<$^F> when the corresponding file, pipe, or socket was opened, not the +time of the exec(). =item $^H -The current set of syntax checks enabled by C<use strict> and other block -scoped compiler hints. See the documentation of C<strict> for more details. +WARNING: This variable is strictly for internal use only. Its availability, +behavior, and contents are subject to change without notice. + +This variable contains compile-time hints for the Perl interpreter. At the +end of compilation of a BLOCK the value of this variable is restored to the +value when the interpreter started to compile the BLOCK. + +When perl begins to parse any block construct that provides a lexical scope +(e.g., eval body, required file, subroutine body, loop body, or conditional +block), the existing value of $^H is saved, but its value is left unchanged. +When the compilation of the block is completed, it regains the saved value. +Between the points where its value is saved and restored, code that +executes within BEGIN blocks is free to change the value of $^H. + +This behavior provides the semantic of lexical scoping, and is used in, +for instance, the C<use strict> pragma. + +The contents should be an integer; different bits of it are used for +different pragmatic flags. Here's an example: + + sub add_100 { $^H |= 0x100 } + + sub foo { + BEGIN { add_100() } + bar->baz($boon); + } + +Consider what happens during execution of the BEGIN block. At this point +the BEGIN block has already been compiled, but the body of foo() is still +being compiled. The new value of $^H will therefore be visible only while +the body of foo() is being compiled. + +Substitution of the above BEGIN block with: + + BEGIN { require strict; strict->import('vars') } + +demonstrates how C<use strict 'vars'> is implemented. Here's a conditional +version of the same lexical pragma: + + BEGIN { require strict; strict->import('vars') if $condition } + +=item %^H + +WARNING: This variable is strictly for internal use only. Its availability, +behavior, and contents are subject to change without notice. + +The %^H hash provides the same scoping semantic as $^H. This makes it +useful for implementation of lexically scoped pragmas. =item $INPLACE_EDIT @@ -695,17 +806,18 @@ inplace editing. (Mnemonic: value of B<-i> switch.) =item $^M -By default, running out of memory it is not trappable. However, if -compiled for this, Perl may use the contents of C<$^M> as an emergency -pool after die()ing with this message. Suppose that your Perl were -compiled with -DPERL_EMERGENCY_SBRK and used Perl's malloc. Then +By default, running out of memory is an untrappable, fatal error. +However, if suitably built, Perl can use the contents of C<$^M> +as an emergency memory pool after die()ing. Suppose that your Perl +were compiled with -DPERL_EMERGENCY_SBRK and used Perl's malloc. +Then - $^M = 'a' x (1<<16); + $^M = 'a' x (1 << 16); -would allocate a 64K buffer for use when in emergency. See the F<INSTALL> -file for information on how to enable this option. As a disincentive to -casual use of this advanced feature, there is no L<English> long name for -this variable. +would allocate a 64K buffer for use when in emergency. See the +F<INSTALL> file in the Perl distribution for information on how to +enable this option. To discourage casual use of this advanced +feature, there is no L<English> long name for this variable. =item $OSNAME @@ -713,14 +825,15 @@ this variable. The name of the operating system under which this copy of Perl was built, as determined during the configuration process. The value -is identical to C<$Config{'osname'}>. +is identical to C<$Config{'osname'}>. See also L<Config> and the +B<-V> command-line switch documented in L<perlrun>. =item $PERLDB =item $^P -The internal variable for debugging support. Different bits mean the -following (subject to change): +The internal variable for debugging support. The meanings of the +various bits are subject to change, but currently indicate: =over 6 @@ -748,65 +861,128 @@ Keep info about source lines on which a subroutine is defined. Start with single-step on. +=item 0x40 + +Use subroutine address instead of name when reporting. + +=item 0x80 + +Report C<goto &subroutine> as well. + +=item 0x100 + +Provide informative "file" names for evals based on the place they were compiled. + +=item 0x200 + +Provide informative names to anonymous subroutines based on the place they +were compiled. + =back -Note that some bits may be relevant at compile-time only, some at -run-time only. This is a new mechanism and the details may change. +Some bits may be relevant at compile-time only, some at +run-time only. This is a new mechanism and the details may change. + +=item $LAST_REGEXP_CODE_RESULT =item $^R -The result of evaluation of the last successful L<perlre/C<(?{ code })>> -regular expression assertion. (Excluding those used as switches.) May -be written to. +The result of evaluation of the last successful C<(?{ code })> +regular expression assertion (see L<perlre>). May be written to. + +=item $EXCEPTIONS_BEING_CAUGHT =item $^S Current state of the interpreter. Undefined if parsing of the current module/eval is not finished (may happen in $SIG{__DIE__} and -$SIG{__WARN__} handlers). True if inside an eval, otherwise false. +$SIG{__WARN__} handlers). True if inside an eval(), otherwise false. =item $BASETIME =item $^T -The time at which the script began running, in seconds since the +The time at which the program began running, in seconds since the epoch (beginning of 1970). The values returned by the B<-M>, B<-A>, -and B<-C> filetests are -based on this value. +and B<-C> filetests are based on this value. + +=item $PERL_VERSION + +=item $^V + +The revision, version, and subversion of the Perl interpreter, represented +as a string composed of characters with those ordinals. Thus in Perl v5.6.0 +it equals C<chr(5) . chr(6) . chr(0)> and will return true for +C<$^V eq v5.6.0>. Note that the characters in this string value can +potentially be in Unicode range. + +This can be used to determine whether the Perl interpreter executing a +script is in the right range of versions. (Mnemonic: use ^V for Version +Control.) Example: + + warn "No "our" declarations!\n" if $^V and $^V lt v5.6.0; + +See the documentation of C<use VERSION> and C<require VERSION> +for a convenient way to fail if the running Perl interpreter is too old. + +See also C<$]> for an older representation of the Perl version. =item $WARNING =item $^W -The current value of the warning switch, either TRUE or FALSE. -(Mnemonic: related to the B<-w> switch.) +The current value of the warning switch, initially true if B<-w> +was used, false otherwise, but directly modifiable. (Mnemonic: +related to the B<-w> switch.) See also L<warnings>. + +=item ${^WARNING_BITS} + +The current set of warning checks enabled by the C<use warnings> pragma. +See the documentation of C<warnings> for more details. + +=item ${^WIDE_SYSTEM_CALLS} + +Global flag that enables system calls made by Perl to use wide character +APIs native to the system, if available. This is currently only implemented +on the Windows platform. + +This can also be enabled from the command line using the C<-C> switch. + +The initial value is typically C<0> for compatibility with Perl versions +earlier than 5.6, but may be automatically set to C<1> by Perl if the system +provides a user-settable default (e.g., C<$ENV{LC_CTYPE}>). + +The C<bytes> pragma always overrides the effect of this flag in the current +lexical scope. See L<bytes>. =item $EXECUTABLE_NAME =item $^X The name that the Perl binary itself was executed as, from C's C<argv[0]>. +This may not be a full pathname, nor even necessarily in your path. =item $ARGV -contains the name of the current file when reading from E<lt>E<gt>. +contains the name of the current file when reading from <>. =item @ARGV -The array @ARGV contains the command line arguments intended for the -script. Note that C<$#ARGV> is the generally number of arguments minus -one, because C<$ARGV[0]> is the first argument, I<NOT> the command name. See -"C<$0>" for the command name. +The array @ARGV contains the command-line arguments intended for +the script. C<$#ARGV> is generally the number of arguments minus +one, because C<$ARGV[0]> is the first argument, I<not> the program's +command name itself. See C<$0> for the command name. =item @INC -The array @INC contains the list of places to look for Perl scripts to -be evaluated by the C<do EXPR>, C<require>, or C<use> constructs. It -initially consists of the arguments to any B<-I> command line switches, -followed by the default Perl library, probably F</usr/local/lib/perl>, -followed by ".", to represent the current directory. If you need to -modify this at runtime, you should use the C<use lib> pragma -to get the machine-dependent library properly loaded also: +The array @INC contains the list of places that the C<do EXPR>, +C<require>, or C<use> constructs look for their library files. It +initially consists of the arguments to any B<-I> command-line +switches, followed by the default Perl library, probably +F</usr/local/lib/perl>, followed by ".", to represent the current +directory. If you need to modify this at runtime, you should use +the C<use lib> pragma to get the machine-dependent library properly +loaded also: use lib '/mypath/libdir/'; use SomeMod; @@ -814,29 +990,30 @@ to get the machine-dependent library properly loaded also: =item @_ Within a subroutine the array @_ contains the parameters passed to that -subroutine. See L<perlsub>. +subroutine. See L<perlsub>. =item %INC -The hash %INC contains entries for each filename that has -been included via C<do> or C<require>. The key is the filename you -specified, and the value is the location of the file actually found. -The C<require> command uses this array to determine whether a given file -has already been included. +The hash %INC contains entries for each filename included via the +C<do>, C<require>, or C<use> operators. The key is the filename +you specified (with module names converted to pathnames), and the +value is the location of the file found. The C<require> +operator uses this hash to determine whether a particular file has +already been included. =item %ENV =item $ENV{expr} The hash %ENV contains your current environment. Setting a -value in C<ENV> changes the environment for child processes. +value in C<ENV> changes the environment for any child processes +you subsequently fork() off. =item %SIG =item $SIG{expr} -The hash %SIG is used to set signal handlers for various -signals. Example: +The hash %SIG contains signal handlers for signals. For example: sub handler { # 1st argument is signal name my($sig) = @_; @@ -848,30 +1025,27 @@ signals. Example: $SIG{'INT'} = \&handler; $SIG{'QUIT'} = \&handler; ... - $SIG{'INT'} = 'DEFAULT'; # restore default action + $SIG{'INT'} = 'DEFAULT'; # restore default action $SIG{'QUIT'} = 'IGNORE'; # ignore SIGQUIT Using a value of C<'IGNORE'> usually has the effect of ignoring the signal, except for the C<CHLD> signal. See L<perlipc> for more about this special case. -The %SIG array contains values for only the signals actually set within -the Perl script. Here are some other examples: +Here are some other examples: - $SIG{"PIPE"} = Plumber; # SCARY!! $SIG{"PIPE"} = "Plumber"; # assumes main::Plumber (not recommended) $SIG{"PIPE"} = \&Plumber; # just fine; assume current Plumber + $SIG{"PIPE"} = *Plumber; # somewhat esoteric $SIG{"PIPE"} = Plumber(); # oops, what did Plumber() return?? -The one marked scary is problematic because it's a bareword, which means -sometimes it's a string representing the function, and sometimes it's -going to call the subroutine call right then and there! Best to be sure -and quote it or take a reference to it. *Plumber works too. See L<perlsub>. +Be sure not to use a bareword as the name of a signal handler, +lest you inadvertently call it. If your system has the sigaction() function then signal handlers are installed using it. This means you get reliable signal handling. If your system has the SA_RESTART flag it is used when signals handlers are -installed. This means that system calls for which it is supported +installed. This means that system calls for which restarting is supported continue rather than returning when a signal arrives. If you want your system calls to be interrupted by signal delivery then do something like this: @@ -902,16 +1076,20 @@ unless the hook routine itself exits via a C<goto>, a loop exit, or a die(). The C<__DIE__> handler is explicitly disabled during the call, so that you can die from a C<__DIE__> handler. Similarly for C<__WARN__>. -Note that the C<$SIG{__DIE__}> hook is called even inside eval()ed -blocks/strings. See L<perlfunc/die> and L<perlvar/$^S> for how to -circumvent this. - -Note that C<__DIE__>/C<__WARN__> handlers are very special in one -respect: they may be called to report (probable) errors found by the -parser. In such a case the parser may be in inconsistent state, so -any attempt to evaluate Perl code from such a handler will probably -result in a segfault. This means that calls which result/may-result -in parsing Perl should be used with extreme caution, like this: +Due to an implementation glitch, the C<$SIG{__DIE__}> hook is called +even inside an eval(). Do not use this to rewrite a pending exception +in C<$@>, or as a bizarre substitute for overriding CORE::GLOBAL::die(). +This strange action at a distance may be fixed in a future release +so that C<$SIG{__DIE__}> is only called if your program is about +to exit, as was the original intent. Any other use is deprecated. + +C<__DIE__>/C<__WARN__> handlers are very special in one respect: +they may be called to report (probable) errors found by the parser. +In such a case the parser may be in inconsistent state, so any +attempt to evaluate Perl code from such a handler will probably +result in a segfault. This means that warnings or errors that +result from parsing Perl should be used with extreme caution, like +this: require Carp if defined $^S; Carp::confess("Something wrong") if defined &Carp::confess; @@ -923,83 +1101,94 @@ called the handler. The second line will print backtrace and die if Carp was available. The third line will be executed only if Carp was not available. -See L<perlfunc/die>, L<perlfunc/warn> and L<perlfunc/eval> for -additional info. +See L<perlfunc/die>, L<perlfunc/warn>, L<perlfunc/eval>, and +L<warnings> for additional information. =back =head2 Error Indicators -The variables L<$@>, L<$!>, L<$^E>, and L<$?> contain information about -different types of error conditions that may appear during execution of -Perl script. The variables are shown ordered by the "distance" between -the subsystem which reported the error and the Perl process, and -correspond to errors detected by the Perl interpreter, C library, -operating system, or an external program, respectively. +The variables C<$@>, C<$!>, C<$^E>, and C<$?> contain information +about different types of error conditions that may appear during +execution of a Perl program. The variables are shown ordered by +the "distance" between the subsystem which reported the error and +the Perl process. They correspond to errors detected by the Perl +interpreter, C library, operating system, or an external program, +respectively. To illustrate the differences between these variables, consider the -following Perl expression: +following Perl expression, which uses a single-quoted string: - eval ' - open PIPE, "/cdrom/install |"; - @res = <PIPE>; - close PIPE or die "bad pipe: $?, $!"; - '; + eval q{ + open PIPE, "/cdrom/install |"; + @res = <PIPE>; + close PIPE or die "bad pipe: $?, $!"; + }; After execution of this statement all 4 variables may have been set. -$@ is set if the string to be C<eval>-ed did not compile (this may happen if -C<open> or C<close> were imported with bad prototypes), or if Perl -code executed during evaluation die()d (either implicitly, say, -if C<open> was imported from module L<Fatal>, or the C<die> after -C<close> was triggered). In these cases the value of $@ is the compile -error, or C<Fatal> error (which will interpolate C<$!>!), or the argument -to C<die> (which will interpolate C<$!> and C<$?>!). - -When the above expression is executed, open(), C<<PIPEE<gt>>, and C<close> -are translated to C run-time library calls. $! is set if one of these -calls fails. The value is a symbolic indicator chosen by the C run-time -library, say C<No such file or directory>. - -On some systems the above C library calls are further translated -to calls to the kernel. The kernel may have set more verbose error -indicator that one of the handful of standard C errors. In such cases $^E -contains this verbose error indicator, which may be, say, C<CDROM tray not -closed>. On systems where C library calls are identical to system calls -$^E is a duplicate of $!. - -Finally, $? may be set to non-C<0> value if the external program -C</cdrom/install> fails. Upper bits of the particular value may reflect -specific error conditions encountered by this program (this is -program-dependent), lower-bits reflect mode of failure (segfault, completion, -etc.). Note that in contrast to $@, $!, and $^E, which are set only -if error condition is detected, the variable $? is set on each C<wait> or -pipe C<close>, overwriting the old value. - -For more details, see the individual descriptions at L<$@>, L<$!>, L<$^E>, -and L<$?>. +C<$@> is set if the string to be C<eval>-ed did not compile (this +may happen if C<open> or C<close> were imported with bad prototypes), +or if Perl code executed during evaluation die()d . In these cases +the value of $@ is the compile error, or the argument to C<die> +(which will interpolate C<$!> and C<$?>!). (See also L<Fatal>, +though.) + +When the eval() expression above is executed, open(), C<< <PIPE> >>, +and C<close> are translated to calls in the C run-time library and +thence to the operating system kernel. C<$!> is set to the C library's +C<errno> if one of these calls fails. + +Under a few operating systems, C<$^E> may contain a more verbose +error indicator, such as in this case, "CDROM tray not closed." +Systems that do not support extended error messages leave C<$^E> +the same as C<$!>. + +Finally, C<$?> may be set to non-0 value if the external program +F</cdrom/install> fails. The upper eight bits reflect specific +error conditions encountered by the program (the program's exit() +value). The lower eight bits reflect mode of failure, like signal +death and core dump information See wait(2) for details. In +contrast to C<$!> and C<$^E>, which are set only if error condition +is detected, the variable C<$?> is set on each C<wait> or pipe +C<close>, overwriting the old value. This is more like C<$@>, which +on every eval() is always set on failure and cleared on success. +For more details, see the individual descriptions at C<$@>, C<$!>, C<$^E>, +and C<$?>. =head2 Technical Note on the Syntax of Variable Names -Variable names in Perl can have several formats. Usually, they must -begin with a letter or underscore, in which case they can be -arbitrarily long (up to an internal limit of 256 characters) and may -contain letters, digits, underscores, or the special sequence C<::>. -In this case the part before the last C<::> is taken to be a I<package -qualifier>; see L<perlmod>. +Variable names in Perl can have several formats. Usually, they +must begin with a letter or underscore, in which case they can be +arbitrarily long (up to an internal limit of 251 characters) and +may contain letters, digits, underscores, or the special sequence +C<::> or C<'>. In this case, the part before the last C<::> or +C<'> is taken to be a I<package qualifier>; see L<perlmod>. Perl variable names may also be a sequence of digits or a single punctuation or control character. These names are all reserved for -special uses by Perl; for example, the all-digits names are used to -hold backreferences after a regular expression match. Perl has a -special syntax for the single-control-character names: It understands -C<^X> (caret C<X>) to mean the control-C<X> character. For example, -the notation C<$^W> (dollar-sign caret C<W>) is the scalar variable -whose name is the single character control-C<W>. This is better than -typing a literal control-C<W> into your program. - -All Perl variables that begin with digits, control characters, or +special uses by Perl; for example, the all-digits names are used +to hold data captured by backreferences after a regular expression +match. Perl has a special syntax for the single-control-character +names: It understands C<^X> (caret C<X>) to mean the control-C<X> +character. For example, the notation C<$^W> (dollar-sign caret +C<W>) is the scalar variable whose name is the single character +control-C<W>. This is better than typing a literal control-C<W> +into your program. + +Finally, new in Perl 5.6, Perl variable names may be alphanumeric +strings that begin with control characters (or better yet, a caret). +These variables must be written in the form C<${^Foo}>; the braces +are not optional. C<${^Foo}> denotes the scalar variable whose +name is a control-C<F> followed by two C<o>'s. These variables are +reserved for future special uses by Perl, except for the ones that +begin with C<^_> (control-underscore or caret-underscore). No +control-character name that begins with C<^_> will acquire a special +meaning in any future version of Perl; such names may therefore be +used safely in programs. C<$^_> itself, however, I<is> reserved. + +Perl identifiers that begin with digits, control characters, or punctuation characters are exempt from the effects of the C<package> declaration and are always forced to be in package C<main>. A few other names are also exempt: @@ -1010,3 +1199,22 @@ other names are also exempt: ARGVOUT SIG +In particular, the new special C<${^_XYZ}> variables are always taken +to be in package C<main>, regardless of any C<package> declarations +presently in scope. + +=head1 BUGS + +Due to an unfortunate accident of Perl's implementation, C<use +English> imposes a considerable performance penalty on all regular +expression matches in a program, regardless of whether they occur +in the scope of C<use English>. For that reason, saying C<use +English> in libraries is strongly discouraged. See the +Devel::SawAmpersand module documentation from CPAN +(http://www.perl.com/CPAN/modules/by-module/Devel/) +for more information. + +Having to even think about the C<$^S> variable in your exception +handlers is simply wrong. C<$SIG{__DIE__}> as currently implemented +invites grievous and difficult to track down errors. Avoid it +and use an C<END{}> or CORE::GLOBAL::die override instead. diff --git a/contrib/perl5/pod/perlxs.pod b/contrib/perl5/pod/perlxs.pod index 98a9834..3c0927e 100644 --- a/contrib/perl5/pod/perlxs.pod +++ b/contrib/perl5/pod/perlxs.pod @@ -6,28 +6,73 @@ perlxs - XS language reference manual =head2 Introduction -XS is a language used to create an extension interface -between Perl and some C library which one wishes to use with -Perl. The XS interface is combined with the library to -create a new library which can be linked to Perl. An B<XSUB> -is a function in the XS language and is the core component -of the Perl application interface. - -The XS compiler is called B<xsubpp>. This compiler will embed -the constructs necessary to let an XSUB, which is really a C -function in disguise, manipulate Perl values and creates the -glue necessary to let Perl access the XSUB. The compiler +XS is an interface description file format used to create an extension +interface between Perl and C code (or a C library) which one wishes +to use with Perl. The XS interface is combined with the library to +create a new library which can then be either dynamically loaded +or statically linked into perl. The XS interface description is +written in the XS language and is the core component of the Perl +extension interface. + +An B<XSUB> forms the basic unit of the XS interface. After compilation +by the B<xsubpp> compiler, each XSUB amounts to a C function definition +which will provide the glue between Perl calling conventions and C +calling conventions. + +The glue code pulls the arguments from the Perl stack, converts these +Perl values to the formats expected by a C function, call this C function, +transfers the return values of the C function back to Perl. +Return values here may be a conventional C return value or any C +function arguments that may serve as output parameters. These return +values may be passed back to Perl either by putting them on the +Perl stack, or by modifying the arguments supplied from the Perl side. + +The above is a somewhat simplified view of what really happens. Since +Perl allows more flexible calling conventions than C, XSUBs may do much +more in practice, such as checking input parameters for validity, +throwing exceptions (or returning undef/empty list) if the return value +from the C function indicates failure, calling different C functions +based on numbers and types of the arguments, providing an object-oriented +interface, etc. + +Of course, one could write such glue code directly in C. However, this +would be a tedious task, especially if one needs to write glue for +multiple C functions, and/or one is not familiar enough with the Perl +stack discipline and other such arcana. XS comes to the rescue here: +instead of writing this glue C code in long-hand, one can write +a more concise short-hand I<description> of what should be done by +the glue, and let the XS compiler B<xsubpp> handle the rest. + +The XS language allows one to describe the mapping between how the C +routine is used, and how the corresponding Perl routine is used. It +also allows creation of Perl routines which are directly translated to +C code and which are not related to a pre-existing C function. In cases +when the C interface coincides with the Perl interface, the XSUB +declaration is almost identical to a declaration of a C function (in K&R +style). In such circumstances, there is another tool called C<h2xs> +that is able to translate an entire C header file into a corresponding +XS file that will provide glue to the functions/macros described in +the header file. + +The XS compiler is called B<xsubpp>. This compiler creates +the constructs necessary to let an XSUB manipulate Perl values, and +creates the glue necessary to let Perl call the XSUB. The compiler uses B<typemaps> to determine how to map C function parameters -and variables to Perl values. The default typemap handles -many common C types. A supplement typemap must be created -to handle special structures and types for the library being -linked. +and output values to Perl values and back. The default typemap +(which comes with Perl) handles many common C types. A supplementary +typemap may also be needed to handle any special structures and types +for the library being linked. + +A file in XS format starts with a C language section which goes until the +first C<MODULE =Z<>> directive. Other XS directives and XSUB definitions +may follow this line. The "language" used in this part of the file +is usually referred to as the XS language. See L<perlxstut> for a tutorial on the whole extension creation process. -Note: For many extensions, Dave Beazley's SWIG system provides a -significantly more convenient mechanism for creating the XS glue -code. See L<http://www.cs.utah.edu/~beazley/SWIG> for more +Note: For some extensions, Dave Beazley's SWIG system may provide a +significantly more convenient mechanism for creating the extension glue +code. See L<http://www.swig.org> for more information. =head2 On The Road @@ -76,7 +121,7 @@ expanded later in this document. rpcb_gettime(host,timep) char *host time_t &timep - OUTPUT: + OUTPUT: timep Any extension to Perl, including those containing XSUBs, @@ -110,6 +155,10 @@ function. =head2 The Anatomy of an XSUB +The simplest XSUBs consist of 3 parts: a description of the return +value, the name of the XSUB routine and the names of its arguments, +and a description of types or formats of the arguments. + The following XSUB allows a Perl program to access a C library function called sin(). The XSUB will imitate the C function which takes a single argument and returns a single value. @@ -118,14 +167,24 @@ argument and returns a single value. sin(x) double x -When using C pointers the indirection operator C<*> should be considered -part of the type and the address operator C<&> should be considered part of -the variable, as is demonstrated in the rpcb_gettime() function above. See -the section on typemaps for more about handling qualifiers and unary +When using parameters with C pointer types, as in + + double string_to_double(char *s); + +there may be two ways to describe this argument to B<xsubpp>: + + char * s + char &s + +Both these XS declarations correspond to the C<char*> C type, but they have +different semantics. It is convenient to think that the indirection operator +C<*> should be considered as a part of the type and the address operator C<&> +should be considered part of the variable. See L<"The Typemap"> and +L<"The & Unary Operator"> for more info about handling qualifiers and unary operators in C types. The function name and the return type must be placed on -separate lines. +separate lines and should be flush left-adjusted. INCORRECT CORRECT @@ -135,7 +194,7 @@ separate lines. The function body may be indented or left-adjusted. The following example shows a function with its body left-adjusted. Most examples in this -document will indent the body. +document will indent the body for better readability. CORRECT @@ -143,13 +202,23 @@ document will indent the body. sin(x) double x +More complicated XSUBs may contain many other sections. Each section of +an XSUB starts with the corresponding keyword, such as INIT: or CLEANUP:. +However, the first two lines of an XSUB always contain the same data: +descriptions of the return type and the names of the function and its +parameters. Whatever immediately follows these is considered to be +an INPUT: section unless explicitly marked with another keyword. +(See L<The INPUT: Keyword>.) + +An XSUB section continues until another section-start keyword is found. + =head2 The Argument Stack -The argument stack is used to store the values which are +The Perl argument stack is used to store the values which are sent as parameters to the XSUB and to store the XSUB's -return value. In reality all Perl functions keep their -values on this stack at the same time, each limited to its -own range of positions on the stack. In this document the +return value(s). In reality all Perl functions (including non-XSUB +ones) keep their values on this stack all the same time, each limited +to its own range of positions on the stack. In this document the first position on that stack which belongs to the active function will be referred to as position 0 for that function. @@ -163,17 +232,19 @@ typemaps. In more complex cases the programmer must supply the code. =head2 The RETVAL Variable -The RETVAL variable is a magic variable which always matches -the return type of the C library function. The B<xsubpp> compiler will -supply this variable in each XSUB and by default will use it to hold the -return value of the C library function being called. In simple cases the -value of RETVAL will be placed in ST(0) of the argument stack where it can -be received by Perl as the return value of the XSUB. +The RETVAL variable is a special C variable that is declared automatically +for you. The C type of RETVAL matches the return type of the C library +function. The B<xsubpp> compiler will declare this variable in each XSUB +with non-C<void> return type. By default the generated C function +will use RETVAL to hold the return value of the C library function being +called. In simple cases the value of RETVAL will be placed in ST(0) of +the argument stack where it can be received by Perl as the return value +of the XSUB. If the XSUB has a return type of C<void> then the compiler will -not supply a RETVAL variable for that function. When using -the PPCODE: directive the RETVAL variable is not needed, unless used -explicitly. +not declare a RETVAL variable for that function. When using +a PPCODE: section no manipulation of the RETVAL variable is required, the +section may use direct stack manipulation to place output values on the stack. If PPCODE: directive is not used, C<void> return value should be used only for subroutines which do not return a value, I<even if> CODE: @@ -248,8 +319,9 @@ keyword. The OUTPUT: keyword indicates that certain function parameters should be updated (new values made visible to Perl) when the XSUB terminates or that certain values should be returned to the calling Perl function. For -simple functions, such as the sin() function above, the RETVAL variable is -automatically designated as an output value. In more complex functions +simple functions which have no CODE: or PPCODE: section, +such as the sin() function above, the RETVAL variable is +automatically designated as an output value. For more complex functions the B<xsubpp> compiler will need help to determine which variables are output variables. @@ -268,7 +340,7 @@ be seen by Perl. rpcb_gettime(host,timep) char *host time_t &timep - OUTPUT: + OUTPUT: timep The OUTPUT: keyword will also allow an output parameter to @@ -279,7 +351,7 @@ typemap. rpcb_gettime(host,timep) char *host time_t &timep - OUTPUT: + OUTPUT: timep sv_setnv(ST(1), (double)timep); B<xsubpp> emits an automatic C<SvSETMAGIC()> for all parameters in the @@ -297,8 +369,8 @@ about 'set' magic. This keyword is used in more complicated XSUBs which require special handling for the C function. The RETVAL variable is -available but will not be returned unless it is specified -under the OUTPUT: keyword. +still declared, but it will not be returned unless it is specified +in the OUTPUT: section. The following XSUB is for a C function which requires special handling of its parameters. The Perl usage is given first. @@ -311,9 +383,9 @@ The XSUB follows. rpcb_gettime(host,timep) char *host time_t timep - CODE: + CODE: RETVAL = rpcb_gettime( host, &timep ); - OUTPUT: + OUTPUT: timep RETVAL @@ -327,11 +399,24 @@ above, this keyword does not affect the way the compiler handles RETVAL. rpcb_gettime(host,timep) char *host time_t &timep - INIT: + INIT: printf("# Host is %s\n", host ); - OUTPUT: + OUTPUT: timep +Another use for the INIT: section is to check for preconditions before +making a call to the C function: + + long long + lldiv(a,b) + long long a + long long b + INIT: + if (a == 0 && b == 0) + XSRETURN_UNDEF; + if (b == 0) + croak("lldiv: cannot divide by 0"); + =head2 The NO_INIT Keyword The NO_INIT keyword is used to indicate that a function @@ -351,30 +436,34 @@ not care about its initial contents. rpcb_gettime(host,timep) char *host time_t &timep = NO_INIT - OUTPUT: + OUTPUT: timep =head2 Initializing Function Parameters -Function parameters are normally initialized with their -values from the argument stack. The typemaps contain the -code segments which are used to transfer the Perl values to +C function parameters are normally initialized with their values from +the argument stack (which in turn contains the parameters that were +passed to the XSUB from Perl). The typemaps contain the +code segments which are used to translate the Perl values to the C parameters. The programmer, however, is allowed to override the typemaps and supply alternate (or additional) -initialization code. +initialization code. Initialization code starts with the first +C<=>, C<;> or C<+> on a line in the INPUT: section. The only +exception happens if this C<;> terminates the line, then this C<;> +is quietly ignored. The following code demonstrates how to supply initialization code for function parameters. The initialization code is eval'd within double quotes by the compiler before it is added to the output so anything which should be interpreted literally [mainly C<$>, C<@>, or C<\\>] -must be protected with backslashes. The variables C<$var>, C<$arg>, -and C<$type> can be used as in typemaps. +must be protected with backslashes. The variables $var, $arg, +and $type can be used as in typemaps. bool_t rpcb_gettime(host,timep) char *host = (char *)SvPV($arg,PL_na); time_t &timep = 0; - OUTPUT: + OUTPUT: timep This should not be used to supply default values for parameters. One @@ -382,36 +471,47 @@ would normally use this when a function parameter must be processed by another library function before it can be used. Default parameters are covered in the next section. -If the initialization begins with C<=>, then it is output on -the same line where the input variable is declared. If the -initialization begins with C<;> or C<+>, then it is output after -all of the input variables have been declared. The C<=> and C<;> -cases replace the initialization normally supplied from the typemap. -For the C<+> case, the initialization from the typemap will precede -the initialization code included after the C<+>. A global +If the initialization begins with C<=>, then it is output in +the declaration for the input variable, replacing the initialization +supplied by the typemap. If the initialization +begins with C<;> or C<+>, then it is performed after +all of the input variables have been declared. In the C<;> +case the initialization normally supplied by the typemap is not performed. +For the C<+> case, the declaration for the variable will include the +initialization from the typemap. A global variable, C<%v>, is available for the truly rare case where information from one initialization is needed in another initialization. +Here's a truly obscure example: + bool_t rpcb_gettime(host,timep) - time_t &timep ; /*\$v{time}=@{[$v{time}=$arg]}*/ - char *host + SvOK($v{time}) ? SvPV($arg,PL_na) : NULL; - OUTPUT: + time_t &timep ; /* \$v{timep}=@{[$v{timep}=$arg]} */ + char *host + SvOK($v{timep}) ? SvPV($arg,PL_na) : NULL; + OUTPUT: timep +The construct C<\$v{timep}=@{[$v{timep}=$arg]}> used in the above +example has a two-fold purpose: first, when this line is processed by +B<xsubpp>, the Perl snippet C<$v{timep}=$arg> is evaluated. Second, +the text of the evaluated snippet is output into the generated C file +(inside a C comment)! During the processing of C<char *host> line, +$arg will evaluate to C<ST(0)>, and C<$v{timep}> will evaluate to +C<ST(1)>. + =head2 Default Parameter Values -Default values can be specified for function parameters by -placing an assignment statement in the parameter list. The -default value may be a number or a string. Defaults should +Default values for XSUB arguments can be specified by placing an +assignment statement in the parameter list. The default value may +be a number, a string or the special string C<NO_INIT>. Defaults should always be used on the right-most parameters only. To allow the XSUB for rpcb_gettime() to have a default host value the parameters to the XSUB could be rearranged. The XSUB will then call the real rpcb_gettime() function with -the parameters in the correct order. Perl will call this -XSUB with either of the following statements. +the parameters in the correct order. This XSUB can be called +from Perl with either of the following statements: $status = rpcb_gettime( $timep, $host ); @@ -425,20 +525,29 @@ the parameters in the correct order for that function. rpcb_gettime(timep,host="localhost") char *host time_t timep = NO_INIT - CODE: + CODE: RETVAL = rpcb_gettime( host, &timep ); - OUTPUT: + OUTPUT: timep RETVAL =head2 The PREINIT: Keyword -The PREINIT: keyword allows extra variables to be declared before the -typemaps are expanded. If a variable is declared in a CODE: block then that -variable will follow any typemap code. This may result in a C syntax -error. To force the variable to be declared before the typemap code, place -it into a PREINIT: block. The PREINIT: keyword may be used one or more -times within an XSUB. +The PREINIT: keyword allows extra variables to be declared immediately +before or after the declartions of the parameters from the INPUT: section +are emitted. + +If a variable is declared inside a CODE: section it will follow any typemap +code that is emitted for the input parameters. This may result in the +declaration ending up after C code, which is C syntax error. Similar +errors may happen with an explicit C<;>-type or C<+>-type initialization of +parameters is used (see L<"Initializing Function Parameters">). Declaring +these variables in an INIT: section will not help. + +In such cases, to force an additional variable to be declared together +with declarations of other variables, place the declaration into a +PREINIT: section. The PREINIT: keyword may be used one or more times +within an XSUB. The following examples are equivalent, but if the code is using complex typemaps then the first example is safer. @@ -446,23 +555,79 @@ typemaps then the first example is safer. bool_t rpcb_gettime(timep) time_t timep = NO_INIT - PREINIT: + PREINIT: char *host = "localhost"; - CODE: + CODE: RETVAL = rpcb_gettime( host, &timep ); - OUTPUT: + OUTPUT: timep RETVAL -A correct, but error-prone example. +For this particular case an INIT: keyword would generate the +same C code as the PREINIT: keyword. Another correct, but error-prone example: bool_t rpcb_gettime(timep) time_t timep = NO_INIT - CODE: + CODE: char *host = "localhost"; RETVAL = rpcb_gettime( host, &timep ); - OUTPUT: + OUTPUT: + timep + RETVAL + +Another way to declare C<host> is to use a C block in the CODE: section: + + bool_t + rpcb_gettime(timep) + time_t timep = NO_INIT + CODE: + { + char *host = "localhost"; + RETVAL = rpcb_gettime( host, &timep ); + } + OUTPUT: + timep + RETVAL + +The ability to put additional declarations before the typemap entries are +processed is very handy in the cases when typemap conversions manipulate +some global state: + + MyObject + mutate(o) + PREINIT: + MyState st = global_state; + INPUT: + MyObject o; + CLEANUP: + reset_to(global_state, st); + +Here we suppose that conversion to C<MyObject> in the INPUT: section and from +MyObject when processing RETVAL will modify a global variable C<global_state>. +After these conversions are performed, we restore the old value of +C<global_state> (to avoid memory leaks, for example). + +There is another way to trade clarity for compactness: INPUT sections allow +declaration of C variables which do not appear in the parameter list of +a subroutine. Thus the above code for mutate() can be rewritten as + + MyObject + mutate(o) + MyState st = global_state; + MyObject o; + CLEANUP: + reset_to(global_state, st); + +and the code for rpcb_gettime() can be rewritten as + + bool_t + rpcb_gettime(timep) + time_t timep = NO_INIT + char *host = "localhost"; + C_ARGS: + host, &timep + OUTPUT: timep RETVAL @@ -472,8 +637,8 @@ The SCOPE: keyword allows scoping to be enabled for a particular XSUB. If enabled, the XSUB will invoke ENTER and LEAVE automatically. To support potentially complex type mappings, if a typemap entry used -by this XSUB contains a comment like C</*scope*/> then scoping will -automatically be enabled for that XSUB. +by an XSUB contains a comment like C</*scope*/> then scoping will +be automatically enabled for that XSUB. To enable scoping: @@ -497,14 +662,14 @@ evaluated late, after a PREINIT. bool_t rpcb_gettime(host,timep) char *host - PREINIT: + PREINIT: time_t tt; - INPUT: + INPUT: time_t timep - CODE: + CODE: RETVAL = rpcb_gettime( host, &tt ); timep = tt; - OUTPUT: + OUTPUT: timep RETVAL @@ -512,22 +677,43 @@ The next example shows each input parameter evaluated late. bool_t rpcb_gettime(host,timep) - PREINIT: + PREINIT: time_t tt; - INPUT: + INPUT: char *host - PREINIT: + PREINIT: char *h; - INPUT: + INPUT: time_t timep - CODE: + CODE: h = host; RETVAL = rpcb_gettime( h, &tt ); timep = tt; - OUTPUT: + OUTPUT: + timep + RETVAL + +Since INPUT sections allow declaration of C variables which do not appear +in the parameter list of a subroutine, this may be shortened to: + + bool_t + rpcb_gettime(host,timep) + time_t tt; + char *host; + char *h = host; + time_t timep; + CODE: + RETVAL = rpcb_gettime( h, &tt ); + timep = tt; + OUTPUT: timep RETVAL +(We used our knowledge that input conversion for C<char *> is a "simple" one, +thus C<host> is initialized on the declaration line, and our assignment +C<h = host> is not performed too early. Otherwise one would need to have the +assignment C<h = host> in a CODE: or INIT: section.) + =head2 Variable-length Parameter Lists XSUBs can have variable-length parameter lists by specifying an ellipsis @@ -551,14 +737,14 @@ The XS code, with ellipsis, follows. bool_t rpcb_gettime(timep, ...) time_t timep = NO_INIT - PREINIT: + PREINIT: char *host = "localhost"; STRLEN n_a; - CODE: - if( items > 1 ) - host = (char *)SvPV(ST(1), n_a); - RETVAL = rpcb_gettime( host, &timep ); - OUTPUT: + CODE: + if( items > 1 ) + host = (char *)SvPV(ST(1), n_a); + RETVAL = rpcb_gettime( host, &timep ); + OUTPUT: timep RETVAL @@ -566,10 +752,10 @@ The XS code, with ellipsis, follows. The C_ARGS: keyword allows creating of XSUBS which have different calling sequence from Perl than from C, without a need to write -CODE: or CPPCODE: section. The contents of the C_ARGS: paragraph is +CODE: or PPCODE: section. The contents of the C_ARGS: paragraph is put as the argument to the called C function without any change. -For example, suppose that C function is declared as +For example, suppose that a C function is declared as symbolic nth_derivative(int n, symbolic function, int flags); @@ -585,7 +771,7 @@ To do this, declare the XSUB as nth_derivative(function, n) symbolic function int n - C_ARGS: + C_ARGS: n, function, default_flags =head2 The PPCODE: Keyword @@ -595,9 +781,29 @@ to tell the B<xsubpp> compiler that the programmer is supplying the code to control the argument stack for the XSUBs return values. Occasionally one will want an XSUB to return a list of values rather than a single value. In these cases one must use PPCODE: and then explicitly push the list of -values on the stack. The PPCODE: and CODE: keywords are not used +values on the stack. The PPCODE: and CODE: keywords should not be used together within the same XSUB. +The actual difference between PPCODE: and CODE: sections is in the +initialization of C<SP> macro (which stands for the I<current> Perl +stack pointer), and in the handling of data on the stack when returning +from an XSUB. In CODE: sections SP preserves the value which was on +entry to the XSUB: SP is on the function pointer (which follows the +last parameter). In PPCODE: sections SP is moved backward to the +beginning of the parameter list, which allows C<PUSH*()> macros +to place output values in the place Perl expects them to be when +the XSUB returns back to Perl. + +The generated trailer for a CODE: section ensures that the number of return +values Perl will see is either 0 or 1 (depending on the C<void>ness of the +return value of the C function, and heuristics mentioned in +L<"The RETVAL Variable">). The trailer generated for a PPCODE: section +is based on the number of return values and on the number of times +C<SP> was updated by C<[X]PUSH*()> macros. + +Note that macros C<ST(i)>, C<XST_m*()> and C<XSRETURN*()> work equally +well in CODE: sections and PPCODE: sections. + The following XSUB will call the C rpcb_gettime() function and will return its two output values, timep and status, to Perl as a single list. @@ -605,10 +811,10 @@ Perl as a single list. void rpcb_gettime(host) char *host - PREINIT: + PREINIT: time_t timep; bool_t status; - PPCODE: + PPCODE: status = rpcb_gettime( host, &timep ); EXTEND(SP, 2); PUSHs(sv_2mortal(newSViv(status))); @@ -659,10 +865,10 @@ the default return value. SV * rpcb_gettime(host) char * host - PREINIT: + PREINIT: time_t timep; bool_t x; - CODE: + CODE: ST(0) = sv_newmortal(); if( rpcb_gettime( host, &timep ) ) sv_setnv( ST(0), (double)timep); @@ -673,10 +879,10 @@ return value, should the need arise. SV * rpcb_gettime(host) char * host - PREINIT: + PREINIT: time_t timep; bool_t x; - CODE: + CODE: ST(0) = sv_newmortal(); if( rpcb_gettime( host, &timep ) ){ sv_setnv( ST(0), (double)timep); @@ -691,14 +897,14 @@ then not push return values on the stack. void rpcb_gettime(host) char *host - PREINIT: + PREINIT: time_t timep; - PPCODE: + PPCODE: if( rpcb_gettime( host, &timep ) ) PUSHs(sv_2mortal(newSViv(timep))); else{ - /* Nothing pushed on stack, so an empty */ - /* list is implicitly returned. */ + /* Nothing pushed on stack, so an empty + * list is implicitly returned. */ } Some people may be inclined to include an explicit C<return> in the above @@ -707,6 +913,32 @@ situations C<XSRETURN_EMPTY> should be used, instead. This will ensure that the XSUB stack is properly adjusted. Consult L<perlguts/"API LISTING"> for other C<XSRETURN> macros. +Since C<XSRETURN_*> macros can be used with CODE blocks as well, one can +rewrite this example as: + + int + rpcb_gettime(host) + char *host + PREINIT: + time_t timep; + CODE: + RETVAL = rpcb_gettime( host, &timep ); + if (RETVAL == 0) + XSRETURN_UNDEF; + OUTPUT: + RETVAL + +In fact, one can put this check into a CLEANUP: section as well. Together +with PREINIT: simplifications, this leads to: + + int + rpcb_gettime(host) + char *host + time_t timep; + CLEANUP: + if (RETVAL == 0) + XSRETURN_UNDEF; + =head2 The REQUIRE: Keyword The REQUIRE: keyword is used to indicate the minimum version of the @@ -784,15 +1016,15 @@ prototypes. bool_t rpcb_gettime(timep, ...) time_t timep = NO_INIT - PROTOTYPE: $;$ - PREINIT: + PROTOTYPE: $;$ + PREINIT: char *host = "localhost"; STRLEN n_a; - CODE: + CODE: if( items > 1 ) host = (char *)SvPV(ST(1), n_a); RETVAL = rpcb_gettime( host, &timep ); - OUTPUT: + OUTPUT: timep RETVAL @@ -812,12 +1044,12 @@ C<BAR::getit()> for this function. rpcb_gettime(host,timep) char *host time_t &timep - ALIAS: + ALIAS: FOO::gettime = 1 BAR::getit = 2 - INIT: + INIT: printf("# ix = %d\n", ix ); - OUTPUT: + OUTPUT: timep =head2 The INTERFACE: Keyword @@ -825,14 +1057,14 @@ C<BAR::getit()> for this function. This keyword declares the current XSUB as a keeper of the given calling signature. If some text follows this keyword, it is considered as a list of functions which have this signature, and -should be attached to XSUBs. +should be attached to the current XSUB. -Say, if you have 4 functions multiply(), divide(), add(), subtract() all -having the signature +For example, if you have 4 C functions multiply(), divide(), add(), +subtract() all having the signature: symbolic f(symbolic, symbolic); -you code them all by using XSUB +you can make them all to use the same XSUB using this: symbolic interface_s_ss(arg1, arg2) @@ -842,16 +1074,21 @@ you code them all by using XSUB multiply divide add subtract -The advantage of this approach comparing to ALIAS: keyword is that one +(This is the complete XSUB code for 4 Perl functions!) Four generated +Perl function share names with corresponding C functions. + +The advantage of this approach comparing to ALIAS: keyword is that there +is no need to code a switch statement, each Perl function (which shares +the same XSUB) knows which C function it should call. Additionally, one can attach an extra function remainder() at runtime by using - + CV *mycv = newXSproto("Symbolic::remainder", XS_Symbolic_interface_s_ss, __FILE__, "$$"); XSINTERFACE_FUNC_SET(mycv, remainder); -(This example supposes that there was no INTERFACE_MACRO: section, -otherwise one needs to use something else instead of -C<XSINTERFACE_FUNC_SET>.) +say, from another XSUB. (This example supposes that there was no +INTERFACE_MACRO: section, otherwise one needs to use something else instead of +C<XSINTERFACE_FUNC_SET>, see the next section.) =head2 The INTERFACE_MACRO: Keyword @@ -882,10 +1119,10 @@ in C section, interface_s_ss(arg1, arg2) symbolic arg1 symbolic arg2 - INTERFACE_MACRO: + INTERFACE_MACRO: XSINTERFACE_FUNC_BYOFFSET XSINTERFACE_FUNC_BYOFFSET_set - INTERFACE: + INTERFACE: multiply divide add subtract @@ -903,7 +1140,7 @@ The file F<Rpcb1.xsh> contains our C<rpcb_gettime()> function: rpcb_gettime(host,timep) char *host time_t &timep - OUTPUT: + OUTPUT: timep The XS module can use INCLUDE: to pull that file into it. @@ -936,22 +1173,22 @@ reversed, C<(time_t *timep, char *host)>. long rpcb_gettime(a,b) CASE: ix == 1 - ALIAS: + ALIAS: x_gettime = 1 - INPUT: + INPUT: # 'a' is timep, 'b' is host char *b time_t a = NO_INIT - CODE: + CODE: RETVAL = rpcb_gettime( b, &a ); - OUTPUT: + OUTPUT: a RETVAL CASE: # 'a' is host, 'b' is timep char *a time_t &b = NO_INIT - OUTPUT: + OUTPUT: b RETVAL @@ -964,12 +1201,15 @@ the different argument lists. =head2 The & Unary Operator -The & unary operator is used to tell the compiler that it should dereference -the object when it calls the C function. This is used when a CODE: block is -not used and the object is a not a pointer type (the object is an C<int> or -C<long> but not a C<int*> or C<long*>). +The C<&> unary operator in the INPUT: section is used to tell B<xsubpp> +that it should convert a Perl value to/from C using the C type to the left +of C<&>, but provide a pointer to this value when the C function is called. -The following XSUB will generate incorrect C code. The xsubpp compiler will +This is useful to avoid a CODE: block for a C function which takes a parameter +by reference. Typically, the parameter should be not a pointer type (an +C<int> or C<long> but not a C<int*> or C<long*>). + +The following XSUB will generate incorrect C code. The B<xsubpp> compiler will turn this into code which calls C<rpcb_gettime()> with parameters C<(char *host, time_t timep)>, but the real C<rpcb_gettime()> wants the C<timep> parameter to be of type C<time_t*> rather than C<time_t>. @@ -978,10 +1218,10 @@ parameter to be of type C<time_t*> rather than C<time_t>. rpcb_gettime(host,timep) char *host time_t timep - OUTPUT: + OUTPUT: timep -That problem is corrected by using the C<&> operator. The xsubpp compiler +That problem is corrected by using the C<&> operator. The B<xsubpp> compiler will now turn this into code which calls C<rpcb_gettime()> correctly with parameters C<(char *host, time_t *timep)>. It does this by carrying the C<&> through, so the function call looks like C<rpcb_gettime(host, &timep)>. @@ -990,7 +1230,7 @@ C<&> through, so the function call looks like C<rpcb_gettime(host, &timep)>. rpcb_gettime(host,timep) char *host time_t &timep - OUTPUT: + OUTPUT: timep =head2 Inserting Comments and C Preprocessor Directives @@ -1021,13 +1261,14 @@ and not #if ... version2 #endif -because otherwise xsubpp will believe that you made a duplicate +because otherwise B<xsubpp> will believe that you made a duplicate definition of the function. Also, put a blank line before the #else/#endif so it will not be seen as part of the function body. =head2 Using XS With C++ -If a function is defined as a C++ method then it will assume +If an XSUB name contains C<::>, it is considered to be a C++ method. +The generated Perl function will assume that its first argument is an object pointer. The object pointer will be stored in a variable called THIS. The object should have been created by C++ with the new() function and should @@ -1035,7 +1276,8 @@ be blessed by Perl with the sv_setref_pv() macro. The blessing of the object by Perl can be handled by a typemap. An example typemap is shown at the end of this section. -If the method is defined as static it will call the C++ +If the return type of the XSUB includes C<static>, the method is considered +to be a static method. It will call the C++ function using the class::method() syntax. If the method is not static the function will be called using the THIS-E<gt>method() syntax. @@ -1063,22 +1305,37 @@ not listed. color::set_blue( val ) int val -Both functions will expect an object as the first parameter. The xsubpp -compiler will call that object C<THIS> and will use it to call the specified -method. So in the C++ code the blue() and set_blue() methods will be called -in the following manner. +Both Perl functions will expect an object as the first parameter. In the +generated C++ code the object is called C<THIS>, and the method call will +be performed on this object. So in the C++ code the blue() and set_blue() +methods will be called as this: RETVAL = THIS->blue(); THIS->set_blue( val ); +You could also write a single get/set method using an optional argument: + + int + color::blue( val = NO_INIT ) + int val + PROTOTYPE $;$ + CODE: + if (items > 1) + THIS->set_blue( val ); + RETVAL = THIS->blue(); + OUTPUT: + RETVAL + If the function's name is B<DESTROY> then the C++ C<delete> function will be -called and C<THIS> will be given as its parameter. +called and C<THIS> will be given as its parameter. The generated C++ code for void color::DESTROY() -The C++ code will call C<delete>. +will look like this: + + color *THIS = ...; // Initialized as in typemap delete THIS; @@ -1090,9 +1347,9 @@ argument. color * color::new() -The C++ code will call C<new>. +The generated C++ code will call C<new>. - RETVAL = new color(); + RETVAL = new color(); The following is an example of a typemap that could be used for this C++ example. @@ -1118,30 +1375,59 @@ example. =head2 Interface Strategy When designing an interface between Perl and a C library a straight -translation from C to XS is often sufficient. The interface will often be +translation from C to XS (such as created by C<h2xs -x>) is often sufficient. +However, sometimes the interface will look very C-like and occasionally nonintuitive, especially when the C function -modifies one of its parameters. In cases where the programmer wishes to +modifies one of its parameters, or returns failure inband (as in "negative +return values mean failure"). In cases where the programmer wishes to create a more Perl-like interface the following strategy may help to identify the more critical parts of the interface. -Identify the C functions which modify their parameters. The XSUBs for -these functions may be able to return lists to Perl, or may be -candidates to return undef or an empty list in case of failure. +Identify the C functions with input/output or output parameters. The XSUBs for +these functions may be able to return lists to Perl. + +Identify the C functions which use some inband info as an indication +of failure. They may be +candidates to return undef or an empty list in case of failure. If the +failure may be detected without a call to the C function, you may want to use +an INIT: section to report the failure. For failures detectable after the C +function returns one may want to use a CLEANUP: section to process the +failure. In more complicated cases use CODE: or PPCODE: sections. + +If many functions use the same failure indication based on the return value, +you may want to create a special typedef to handle this situation. Put + + typedef int negative_is_failure; + +near the beginning of XS file, and create an OUTPUT typemap entry +for C<negative_is_failure> which converts negative values to C<undef>, or +maybe croak()s. After this the return value of type C<negative_is_failure> +will create more Perl-like interface. Identify which values are used by only the C and XSUB functions -themselves. If Perl does not need to access the contents of the value +themselves, say, when a parameter to a function should be a contents of a +global variable. If Perl does not need to access the contents of the value then it may not be necessary to provide a translation for that value from C to Perl. Identify the pointers in the C function parameter lists and return -values. Some pointers can be handled in XS with the & unary operator on -the variable name while others will require the use of the * operator on -the type name. In general it is easier to work with the & operator. +values. Some pointers may be used to implement input/output or +output parameters, they can be handled in XS with the C<&> unary operator, +and, possibly, using the NO_INIT keyword. +Some others will require handling of types like C<int *>, and one needs +to decide what a useful Perl translation will do in such a case. When +the semantic is clear, it is advisable to put the translation into a typemap +file. Identify the structures used by the C functions. In many cases it may be helpful to use the T_PTROBJ typemap for these structures so they can be manipulated by Perl as -blessed objects. +blessed objects. (This is handled automatically by C<h2xs -x>.) + +If the same C type is used in several different contexts which require +different translations, C<typedef> several new types mapped to this C type, +and create separate F<typemap> entries for these new types. Use these +types in declarations of return type and parameters to XSUBs. =head2 Perl Objects And C Structures @@ -1188,7 +1474,7 @@ trim the name to the word DESTROY as Perl will expect. void rpcb_DESTROY(netconf) Netconfig *netconf - CODE: + CODE: printf("Now in NetconfigPtr::DESTROY\n"); free( netconf ); @@ -1214,8 +1500,8 @@ getnetconfigent() XSUB and an object created by a normal Perl subroutine. The typemap is a collection of code fragments which are used by the B<xsubpp> compiler to map C function parameters and values to Perl values. The typemap file may consist of three sections labeled C<TYPEMAP>, C<INPUT>, and -C<OUTPUT>. Any unlabelled initial section is assumed to be a C<TYPEMAP> -section if a name is not explicitly specified. The INPUT section tells +C<OUTPUT>. An unlabelled initial section is assumed to be a C<TYPEMAP> +section. The INPUT section tells the compiler how to translate Perl values into variables of certain C types. The OUTPUT section tells the compiler how to translate the values from certain C types into values Perl can @@ -1239,8 +1525,8 @@ with the T_PTROBJ typemap. The typemap used by getnetconfigent() is shown here. Note that the C type is separated from the XS type with a tab and that the C unary operator C<*> is considered to be a part of the C type name. - TYPEMAP - Netconfig *<tab>T_PTROBJ + TYPEMAP + Netconfig *<tab>T_PTROBJ Here's a more complicated example: suppose that you wanted C<struct netconfig> to be blessed into the class C<Net::Config>. One way to do @@ -1290,9 +1576,9 @@ File C<RPC.xs>: Interface to some ONC+ RPC bind library functions. SV * rpcb_gettime(host="localhost") char *host - PREINIT: + PREINIT: time_t timep; - CODE: + CODE: ST(0) = sv_newmortal(); if( rpcb_gettime( host, &timep ) ) sv_setnv( ST(0), (double)timep ); @@ -1306,7 +1592,7 @@ File C<RPC.xs>: Interface to some ONC+ RPC bind library functions. void rpcb_DESTROY(netconf) Netconfig *netconf - CODE: + CODE: printf("NetconfigPtr::DESTROY\n"); free( netconf ); @@ -1348,5 +1634,6 @@ This document covers features supported by C<xsubpp> 1.935. =head1 AUTHOR -Dean Roehrich <F<roehrich@cray.com>> -Jul 8, 1996 +Originally written by Dean Roehrich <F<roehrich@cray.com>>. + +Maintained since 1996 by The Perl Porters <F<perlbug@perl.com>>. diff --git a/contrib/perl5/pod/perlxstut.pod b/contrib/perl5/pod/perlxstut.pod index 69a1a25..d79f4b9 100644 --- a/contrib/perl5/pod/perlxstut.pod +++ b/contrib/perl5/pod/perlxstut.pod @@ -1,6 +1,6 @@ =head1 NAME -perlXStut - Tutorial for XSUBs +perlXStut - Tutorial for writing XSUBs =head1 DESCRIPTION @@ -10,61 +10,57 @@ L<perlxs>. This tutorial starts with very simple examples and becomes more complex, with each new example adding new features. Certain concepts may not be -completely explained until later in the tutorial to ease the -reader slowly into building extensions. +completely explained until later in the tutorial in order to slowly ease +the reader into building extensions. -=head2 VERSION CAVEAT +This tutorial was written from a Unix point of view. Where I know them +to be otherwise different for other platforms (e.g. Win32), I will list +them. If you find something that was missed, please let me know. -This tutorial tries hard to keep up with the latest development versions -of Perl. This often means that it is sometimes in advance of the latest -released version of Perl, and that certain features described here might -not work on earlier versions. This section will keep track of when various -features were added to Perl 5. +=head1 SPECIAL NOTES -=over 4 - -=item * - -In versions of Perl 5.002 prior to the gamma version, the test script -in Example 1 will not function properly. You need to change the "use -lib" line to read: - - use lib './blib'; - -=item * - -In versions of Perl 5.002 prior to version beta 3, the line in the .xs file -about "PROTOTYPES: DISABLE" will cause a compiler error. Simply remove that -line from the file. - -=item * - -In versions of Perl 5.002 prior to version 5.002b1h, the test.pl file was not -automatically created by h2xs. This means that you cannot say "make test" -to run the test script. You will need to add the following line before the -"use extension" statement: +=head2 make - use lib './blib'; +This tutorial assumes that the make program that Perl is configured to +use is called C<make>. Instead of running "make" in the examples that +follow, you may have to substitute whatever make program Perl has been +configured to use. Running B<perl -V:make> should tell you what it is. -=item * +=head2 Version caveat -In versions 5.000 and 5.001, instead of using the above line, you will need -to use the following line: +When writing a Perl extension for general consumption, one should expect that +the extension will be used with versions of Perl different from the +version available on your machine. Since you are reading this document, +the version of Perl on your machine is probably 5.005 or later, but the users +of your extension may have more ancient versions. - BEGIN { unshift(@INC, "./blib") } +To understand what kinds of incompatibilities one may expect, and in the rare +case that the version of Perl on your machine is older than this document, +see the section on "Troubleshooting these Examples" for more information. -=item * +If your extension uses some features of Perl which are not available on older +releases of Perl, your users would appreciate an early meaningful warning. +You would probably put this information into the F<README> file, but nowadays +installation of extensions may be performed automatically, guided by F<CPAN.pm> +module or other tools. -This document assumes that the executable named "perl" is Perl version 5. -Some systems may have installed Perl version 5 as "perl5". +In MakeMaker-based installations, F<Makefile.PL> provides the earliest +opportunity to perform version checks. One can put something like this +in F<Makefile.PL> for this purpose: -=back + eval { require 5.007 } + or die <<EOD; + ############ + ### This module uses frobnication framework which is not available before + ### version 5.007 of Perl. Upgrade your Perl before installing Kara::Mba. + ############ + EOD -=head2 DYNAMIC VERSUS STATIC +=head2 Dynamic Loading versus Static Loading It is commonly thought that if a system does not have the capability to -load a library dynamically, you cannot build XSUBs. This is incorrect. -You I<can> build them, but you must link the XSUB's subroutines with the +dynamically load a library, you cannot build XSUBs. This is incorrect. +You I<can> build them, but you must link the XSUBs subroutines with the rest of Perl, creating a new executable. This situation is similar to Perl 4. @@ -75,25 +71,30 @@ executable with that static library linked in. Should you wish to build a statically-linked executable on a system which can dynamically load libraries, you may, in all the following examples, -where the command "make" with no arguments is executed, run the command -"make perl" instead. +where the command "C<make>" with no arguments is executed, run the command +"C<make perl>" instead. If you have generated such a statically-linked executable by choice, then -instead of saying "make test", you should say "make test_static". On systems -that cannot build dynamically-loadable libraries at all, simply saying "make -test" is sufficient. +instead of saying "C<make test>", you should say "C<make test_static>". +On systems that cannot build dynamically-loadable libraries at all, simply +saying "C<make test>" is sufficient. + +=head1 TUTORIAL + +Now let's go on with the show! =head2 EXAMPLE 1 Our first extension will be very simple. When we call the routine in the extension, it will print out a well-known message and return. -Run C<h2xs -A -n Mytest>. This creates a directory named Mytest, possibly under -ext/ if that directory exists in the current working directory. Several files -will be created in the Mytest dir, including MANIFEST, Makefile.PL, Mytest.pm, -Mytest.xs, test.pl, and Changes. +Run "C<h2xs -A -n Mytest>". This creates a directory named Mytest, +possibly under ext/ if that directory exists in the current working +directory. Several files will be created in the Mytest dir, including +MANIFEST, Makefile.PL, Mytest.pm, Mytest.xs, test.pl, and Changes. -The MANIFEST file contains the names of all the files created. +The MANIFEST file contains the names of all the files just created in the +Mytest directory. The file Makefile.PL should look something like this: @@ -101,28 +102,31 @@ The file Makefile.PL should look something like this: # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( - 'NAME' => 'Mytest', - 'VERSION_FROM' => 'Mytest.pm', # finds $VERSION - 'LIBS' => [''], # e.g., '-lm' - 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING' - 'INC' => '', # e.g., '-I/usr/include/other' + NAME => 'Mytest', + VERSION_FROM => 'Mytest.pm', # finds $VERSION + LIBS => [''], # e.g., '-lm' + DEFINE => '', # e.g., '-DHAVE_SOMETHING' + INC => '', # e.g., '-I/usr/include/other' ); The file Mytest.pm should start with something like this: package Mytest; + use strict; + use warnings; + require Exporter; require DynaLoader; - @ISA = qw(Exporter DynaLoader); + our @ISA = qw(Exporter DynaLoader); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. - @EXPORT = qw( + our @EXPORT = qw( ); - $VERSION = '0.01'; + our $VERSION = '0.01'; bootstrap Mytest $VERSION; @@ -134,19 +138,14 @@ The file Mytest.pm should start with something like this: __END__ # Below is the stub of documentation for your module. You better edit it! -And the Mytest.xs file should look something like this: +The rest of the .pm file contains sample code for providing documentation for +the extension. + +Finally, the Mytest.xs file should look something like this: - #ifdef __cplusplus - extern "C" { - #endif #include "EXTERN.h" #include "perl.h" #include "XSUB.h" - #ifdef __cplusplus - } - #endif - - PROTOTYPES: DISABLE MODULE = Mytest PACKAGE = Mytest @@ -154,10 +153,14 @@ Let's edit the .xs file by adding this to the end of the file: void hello() - CODE: + CODE: printf("Hello, world!\n"); -Now we'll run "perl Makefile.PL". This will create a real Makefile, +It is okay for the lines starting at the "CODE:" line to not be indented. +However, for readability purposes, it is suggested that you indent CODE: +one level and the lines following one more level. + +Now we'll run "C<perl Makefile.PL>". This will create a real Makefile, which make needs. Its output looks something like: % perl Makefile.PL @@ -166,12 +169,14 @@ which make needs. Its output looks something like: Writing Makefile for Mytest % -Now, running make will produce output that looks something like this -(some long lines shortened for clarity): +Now, running make will produce output that looks something like this (some +long lines have been shortened for clarity and some extraneous lines have +been deleted): % make umask 0 && cp Mytest.pm ./blib/Mytest.pm perl xsubpp -typemap typemap Mytest.xs >Mytest.tc && mv Mytest.tc Mytest.c + Please specify prototyping behavior for Mytest.xs (see perlxs manual) cc -c Mytest.c Running Mkbootstrap for Mytest () chmod 644 Mytest.bs @@ -179,9 +184,21 @@ Now, running make will produce output that looks something like this chmod 755 ./blib/PA-RISC1.1/auto/Mytest/Mytest.sl cp Mytest.bs ./blib/PA-RISC1.1/auto/Mytest/Mytest.bs chmod 644 ./blib/PA-RISC1.1/auto/Mytest/Mytest.bs + Manifying ./blib/man3/Mytest.3 + % + +You can safely ignore the line about "prototyping behavior". + +If you are on a Win32 system, and the build process fails with linker +errors for functions in the C library, check if your Perl is configured +to use PerlCRT (running B<perl -V:libc> should show you if this is the +case). If Perl is configured to use PerlCRT, you have to make sure +PerlCRT.lib is copied to the same location that msvcrt.lib lives in, +so that the compiler can find it on its own. msvcrt.lib is usually +found in the Visual C compiler's lib directory (e.g. C:/DevStudio/VC/lib). -Now, although there is already a test.pl template ready for us, for this -example only, we'll create a special test script. Create a file called hello +Perl has its own special way of easily writing test scripts, but for this +example only, we'll create our own test script. Create a file called hello that looks like this: #! /opt/perl5/bin/perl @@ -192,42 +209,40 @@ that looks like this: Mytest::hello(); -Now we run the script and we should see the following output: +Now we make the script executable (C<chmod -x hello>), run the script +and we should see the following output: - % perl hello + % ./hello Hello, world! % =head2 EXAMPLE 2 -Now let's add to our extension a subroutine that will take a single argument -and return 1 if the argument is even, 0 if the argument is odd. +Now let's add to our extension a subroutine that will take a single numeric +argument as input and return 0 if the number is even or 1 if the number +is odd. Add the following to the end of Mytest.xs: int is_even(input) int input - CODE: + CODE: RETVAL = (input % 2 == 0); - OUTPUT: + OUTPUT: RETVAL -There does not need to be white space at the start of the "int input" line, -but it is useful for improving readability. The semi-colon at the end of -that line is also optional. +There does not need to be white space at the start of the "C<int input>" +line, but it is useful for improving readability. Placing a semi-colon at +the end of that line is also optional. Any amount and kind of white space +may be placed between the "C<int>" and "C<input>". -Any white space may be between the "int" and "input". It is also okay for -the four lines starting at the "CODE:" line to not be indented. However, -for readability purposes, it is suggested that you indent them 8 spaces -(or one normal tab stop). - -Now rerun make to rebuild our new shared library. +Now re-run make to rebuild our new shared library. Now perform the same steps as before, generating a Makefile from the Makefile.PL file, and running make. -To test that our extension works, we now need to look at the +In order to test that our extension works, we now need to look at the file test.pl. This file is set up to imitate the same kind of testing structure that Perl itself has. Within the test script, you perform a number of tests to confirm the behavior of the extension, printing "ok" @@ -239,11 +254,11 @@ to the end of the file: print &Mytest::is_even(1) == 0 ? "ok 3" : "not ok 3", "\n"; print &Mytest::is_even(2) == 1 ? "ok 4" : "not ok 4", "\n"; -We will be calling the test script through the command "make test". You +We will be calling the test script through the command "C<make test>". You should see output that looks something like this: % make test - PERL_DL_NONLAZY=1 /opt/perl5.002b2/bin/perl (lots of -I arguments) test.pl + PERL_DL_NONLAZY=1 /opt/perl5.004/bin/perl (lots of -I arguments) test.pl 1..4 ok 1 ok 2 @@ -251,7 +266,7 @@ should see output that looks something like this: ok 4 % -=head2 WHAT HAS GONE ON? +=head2 What has gone on? The program h2xs is the starting point for creating extensions. In later examples we'll see how we can use h2xs to read header files and generate @@ -261,71 +276,72 @@ h2xs creates a number of files in the extension directory. The file Makefile.PL is a perl script which will generate a true Makefile to build the extension. We'll take a closer look at it later. -The files E<lt>extensionE<gt>.pm and E<lt>extensionE<gt>.xs contain the meat -of the extension. -The .xs file holds the C routines that make up the extension. The .pm file -contains routines that tell Perl how to load your extension. - -Generating and invoking the Makefile created a directory blib (which stands -for "build library") in the current working directory. This directory will -contain the shared library that we will build. Once we have tested it, we -can install it into its final location. - -Invoking the test script via "make test" did something very important. It -invoked perl with all those C<-I> arguments so that it could find the various -files that are part of the extension. - -It is I<very> important that while you are still testing extensions that -you use "make test". If you try to run the test script all by itself, you -will get a fatal error. - -Another reason it is important to use "make test" to run your test script -is that if you are testing an upgrade to an already-existing version, using -"make test" insures that you use your new extension, not the already-existing -version. +The .pm and .xs files contain the meat of the extension. The .xs file holds +the C routines that make up the extension. The .pm file contains routines +that tell Perl how to load your extension. + +Generating the Makefile and running C<make> created a directory called blib +(which stands for "build library") in the current working directory. This +directory will contain the shared library that we will build. Once we have +tested it, we can install it into its final location. + +Invoking the test script via "C<make test>" did something very important. +It invoked perl with all those C<-I> arguments so that it could find the +various files that are part of the extension. It is I<very> important that +while you are still testing extensions that you use "C<make test>". If you +try to run the test script all by itself, you will get a fatal error. +Another reason it is important to use "C<make test>" to run your test +script is that if you are testing an upgrade to an already-existing version, +using "C<make test>" insures that you will test your new extension, not the +already-existing version. When Perl sees a C<use extension;>, it searches for a file with the same name -as the use'd extension that has a .pm suffix. If that file cannot be found, +as the C<use>'d extension that has a .pm suffix. If that file cannot be found, Perl dies with a fatal error. The default search path is contained in the -@INC array. +C<@INC> array. In our case, Mytest.pm tells perl that it will need the Exporter and Dynamic -Loader extensions. It then sets the @ISA and @EXPORT arrays and the $VERSION -scalar; finally it tells perl to bootstrap the module. Perl will call its -dynamic loader routine (if there is one) and load the shared library. +Loader extensions. It then sets the C<@ISA> and C<@EXPORT> arrays and the +C<$VERSION> scalar; finally it tells perl to bootstrap the module. Perl +will call its dynamic loader routine (if there is one) and load the shared +library. -The two arrays that are set in the .pm file are very important. The @ISA +The two arrays C<@ISA> and C<@EXPORT> are very important. The C<@ISA> array contains a list of other packages in which to search for methods (or -subroutines) that do not exist in the current package. The @EXPORT array -tells Perl which of the extension's routines should be placed into the -calling package's namespace. +subroutines) that do not exist in the current package. This is usually +only important for object-oriented extensions (which we will talk about +much later), and so usually doesn't need to be modified. -It's important to select what to export carefully. Do NOT export method names -and do NOT export anything else I<by default> without a good reason. +The C<@EXPORT> array tells Perl which of the extension's variables and +subroutines should be placed into the calling package's namespace. Because +you don't know if the user has already used your variable and subroutine +names, it's vitally important to carefully select what to export. Do I<not> +export method or variable names I<by default> without a good reason. As a general rule, if the module is trying to be object-oriented then don't -export anything. If it's just a collection of functions then you can export -any of the functions via another array, called @EXPORT_OK. +export anything. If it's just a collection of functions and variables, then +you can export them via another array, called C<@EXPORT_OK>. This array +does not automatically place its subroutine and variable names into the +namespace unless the user specifically requests that this be done. See L<perlmod> for more information. -The $VERSION variable is used to ensure that the .pm file and the shared +The C<$VERSION> variable is used to ensure that the .pm file and the shared library are "in sync" with each other. Any time you make changes to the .pm or .xs files, you should increment the value of this variable. -=head2 WRITING GOOD TEST SCRIPTS +=head2 Writing good test scripts The importance of writing good test scripts cannot be overemphasized. You should closely follow the "ok/not ok" style that Perl itself uses, so that it is very easy and unambiguous to determine the outcome of each test case. When you find and fix a bug, make sure you add a test case for it. -By running "make test", you ensure that your test.pl script runs and uses +By running "C<make test>", you ensure that your test.pl script runs and uses the correct version of your extension. If you have many test cases, you -might want to copy Perl's test style. Create a directory named "t", and -ensure all your test files end with the suffix ".t". The Makefile will -properly run all these test files. - +might want to copy Perl's test style. Create a directory named "t" in the +extension's directory and append the suffix ".t" to the names of your test +files. When you run "C<make test>", all of these test files will be executed. =head2 EXAMPLE 3 @@ -337,7 +353,7 @@ Add the following to the end of Mytest.xs: void round(arg) double arg - CODE: + CODE: if (arg > 0.0) { arg = floor(arg + 0.5); } else if (arg < 0.0) { @@ -345,14 +361,14 @@ Add the following to the end of Mytest.xs: } else { arg = 0.0; } - OUTPUT: + OUTPUT: arg Edit the Makefile.PL file so that the corresponding line looks like this: 'LIBS' => ['-lm'], # e.g., '-lm' -Generate the Makefile and run make. Change the BEGIN block to print out +Generate the Makefile and run make. Change the BEGIN block to print "1..9" and add the following to test.pl: $i = -1.5; &Mytest::round($i); print $i == -2.0 ? "ok 5" : "not ok 5", "\n"; @@ -361,58 +377,70 @@ Generate the Makefile and run make. Change the BEGIN block to print out $i = 0.5; &Mytest::round($i); print $i == 1.0 ? "ok 8" : "not ok 8", "\n"; $i = 1.2; &Mytest::round($i); print $i == 1.0 ? "ok 9" : "not ok 9", "\n"; -Running "make test" should now print out that all nine tests are okay. +Running "C<make test>" should now print out that all nine tests are okay. -You might be wondering if you can round a constant. To see what happens, add -the following line to test.pl temporarily: +Notice that in these new test cases, the argument passed to round was a +scalar variable. You might be wondering if you can round a constant or +literal. To see what happens, temporarily add the following line to test.pl: &Mytest::round(3); -Run "make test" and notice that Perl dies with a fatal error. Perl won't let -you change the value of constants! +Run "C<make test>" and notice that Perl dies with a fatal error. Perl won't +let you change the value of constants! + +=head2 What's new here? + +=over 4 + +=item * + +We've made some changes to Makefile.PL. In this case, we've specified an +extra library to be linked into the extension's shared library, the math +library libm in this case. We'll talk later about how to write XSUBs that +can call every routine in a library. -=head2 WHAT'S NEW HERE? +=item * -Two things are new here. First, we've made some changes to Makefile.PL. -In this case, we've specified an extra library to link in, the math library -libm. We'll talk later about how to write XSUBs that can call every routine -in a library. +The value of the function is not being passed back as the function's return +value, but by changing the value of the variable that was passed into the +function. You might have guessed that when you saw that the return value +of round is of type "void". -Second, the value of the function is being passed back not as the function's -return value, but through the same variable that was passed into the function. +=back -=head2 INPUT AND OUTPUT PARAMETERS +=head2 Input and Output Parameters -You specify the parameters that will be passed into the XSUB just after you -declare the function return value and name. Each parameter line starts with -optional white space, and may have an optional terminating semicolon. +You specify the parameters that will be passed into the XSUB on the line(s) +after you declare the function's return value and name. Each input parameter +line starts with optional white space, and may have an optional terminating +semicolon. -The list of output parameters occurs after the OUTPUT: directive. The use -of RETVAL tells Perl that you wish to send this value back as the return -value of the XSUB function. In Example 3, the value we wanted returned was -contained in the same variable we passed in, so we listed it (and not RETVAL) -in the OUTPUT: section. +The list of output parameters occurs at the very end of the function, just +before after the OUTPUT: directive. The use of RETVAL tells Perl that you +wish to send this value back as the return value of the XSUB function. In +Example 3, we wanted the "return value" placed in the original variable +which we passed in, so we listed it (and not RETVAL) in the OUTPUT: section. -=head2 THE XSUBPP COMPILER +=head2 The XSUBPP Program -The compiler xsubpp takes the XS code in the .xs file and converts it into +The B<xsubpp> program takes the XS code in the .xs file and translates it into C code, placing it in a file whose suffix is .c. The C code created makes heavy use of the C functions within Perl. -=head2 THE TYPEMAP FILE +=head2 The TYPEMAP file -The xsubpp compiler uses rules to convert from Perl's data types (scalar, -array, etc.) to C's data types (int, char *, etc.). These rules are stored +The B<xsubpp> program uses rules to convert from Perl's data types (scalar, +array, etc.) to C's data types (int, char, etc.). These rules are stored in the typemap file ($PERLLIB/ExtUtils/typemap). This file is split into three parts. -The first part attempts to map various C data types to a coded flag, which -has some correspondence with the various Perl types. The second part contains -C code which xsubpp uses for input parameters. The third part contains C -code which xsubpp uses for output parameters. We'll talk more about the -C code later. +The first section maps various C data types to a name, which corresponds +somewhat with the various Perl types. The second section contains C code +which B<xsubpp> uses to handle input parameters. The third section contains +C code which B<xsubpp> uses to handle output parameters. -Let's now take a look at a portion of the .c file created for our extension. +Let's take a look at a portion of the .c file created for our extension. +The file name is Mytest.c: XS(XS_Mytest_round) { @@ -428,13 +456,13 @@ Let's now take a look at a portion of the .c file created for our extension. } else { arg = 0.0; } - sv_setnv(ST(0), (double)arg); /* XXXXX */ + sv_setnv(ST(0), (double)arg); /* XXXXX */ } XSRETURN(1); } -Notice the two lines marked with "XXXXX". If you check the first section of -the typemap file, you'll see that doubles are of type T_DOUBLE. In the +Notice the two lines commented with "XXXXX". If you check the first section +of the typemap file, you'll see that doubles are of type T_DOUBLE. In the INPUT section, an argument that is T_DOUBLE is assigned to the variable arg by calling the routine SvNV on something, then casting it to double, then assigned to the variable arg. Similarly, in the OUTPUT section, @@ -443,17 +471,19 @@ be passed back to the calling subroutine. These two functions are explained in L<perlguts>; we'll talk more later about what that "ST(0)" means in the section on the argument stack. -=head2 WARNING +=head2 Warning about Output Arguments In general, it's not a good idea to write extensions that modify their input -parameters, as in Example 3. However, to accommodate better calling -pre-existing C routines, which often do modify their input parameters, -this behavior is tolerated. The next example will show how to do this. +parameters, as in Example 3. Instead, you should probably return multiple +values in an array and let the caller handle them (we'll do this in a later +example). However, in order to better accomodate calling pre-existing C +routines, which often do modify their input parameters, this behavior is +tolerated. =head2 EXAMPLE 4 In this example, we'll now begin to write XSUBs that will interact with -predefined C libraries. To begin with, we will build a small library of +pre-defined C libraries. To begin with, we will build a small library of our own, then let h2xs write our .pm and .xs files for us. Create a new directory called Mytest2 at the same level as the directory @@ -477,10 +507,7 @@ Also create a file mylib.c that looks like this: #include "./mylib.h" double - foo(a, b, c) - int a; - long b; - const char * c; + foo(int a, long b, const char *c) { return (a + b + atof(c) + TESTVAL); } @@ -490,9 +517,9 @@ And finally create a file Makefile.PL that looks like this: use ExtUtils::MakeMaker; $Verbose = 1; WriteMakefile( - NAME => 'Mytest2::mylib', - SKIP => [qw(all static static_lib dynamic dynamic_lib)], - clean => {'FILES' => 'libmylib$(LIB_EXT)'}, + NAME => 'Mytest2::mylib', + SKIP => [qw(all static static_lib dynamic dynamic_lib)], + clean => {'FILES' => 'libmylib$(LIBEEXT)'}, ); @@ -500,6 +527,8 @@ And finally create a file Makefile.PL that looks like this: ' all :: static + pure_all :: static + static :: libmylib$(LIB_EXT) libmylib$(LIB_EXT): $(O_FILES) @@ -509,6 +538,11 @@ And finally create a file Makefile.PL that looks like this: '; } +Make sure you use a tab and not spaces on the lines beginning with "$(AR)" +and "$(RANLIB)". Make will not function properly if you use spaces. +It has also been reported that the "cr" argument to $(AR) is unnecessary +on Win32 systems. + We will now create the main top-level Mytest2 files. Change to the directory above Mytest2 and run the following command: @@ -519,24 +553,29 @@ Our files are stored in Mytest2/mylib, and will be untouched. The normal Makefile.PL that h2xs generates doesn't know about the mylib directory. We need to tell it that there is a subdirectory and that we -will be generating a library in it. Let's add the following key-value -pair to the WriteMakefile call: +will be generating a library in it. Let's add the argument MYEXTLIB to +the WriteMakefile call so that it looks like this: - 'MYEXTLIB' => 'mylib/libmylib$(LIB_EXT)', + WriteMakefile( + 'NAME' => 'Mytest2', + 'VERSION_FROM' => 'Mytest2.pm', # finds $VERSION + 'LIBS' => [''], # e.g., '-lm' + 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING' + 'INC' => '', # e.g., '-I/usr/include/other' + 'MYEXTLIB' => 'mylib/libmylib$(LIB_EXT)', + ); -and a new replacement subroutine too: +and then at the end add a subroutine (which will override the pre-existing +subroutine). Remember to use a tab character to indent the line beginning +with "cd"! sub MY::postamble { ' $(MYEXTLIB): mylib/Makefile - cd mylib && $(MAKE) $(PASTHRU) + cd mylib && $(MAKE) $(PASSTHRU) '; } -(Note: Most makes will require that there be a tab character that indents -the line C<cd mylib && $(MAKE) $(PASTHRU)>, similarly for the Makefile in the -subdirectory.) - Let's also fix the MANIFEST file so that it accurately reflects the contents of our extension. The single line that says "mylib" should be replaced by the following three lines: @@ -546,8 +585,7 @@ the following three lines: mylib/mylib.h To keep our namespace nice and unpolluted, edit the .pm file and change -the lines setting @EXPORT to @EXPORT_OK (there are two: one in the line -beginning "use vars" and one setting the array itself). Finally, in the +the variable C<@EXPORT> to C<@EXPORT_OK>. Finally, in the .xs file, edit the #include line to read: #include "mylib/mylib.h" @@ -559,17 +597,17 @@ And also add the following function definition to the end of the .xs file: int a long b const char * c - OUTPUT: + OUTPUT: RETVAL Now we also need to create a typemap file because the default Perl doesn't -currently support the const char * type. Create a file called typemap and -place the following in it: +currently support the const char * type. Create a file called typemap in +the Mytest2 directory and place the following in it: const char * T_PV Now run perl on the top-level Makefile.PL. Notice that it also created a -Makefile in the mylib directory. Run make and see that it does cd into +Makefile in the mylib directory. Run make and watch that it does cd into the mylib directory and run make in there as well. Now edit the test.pl script and change the BEGIN block to print "1..4", @@ -579,13 +617,13 @@ and add the following lines to the end of the script: print &Mytest2::foo(1, 2, "0.0") == 7 ? "ok 3\n" : "not ok 3\n"; print abs(&Mytest2::foo(0, 0, "-3.4") - 0.6) <= 0.01 ? "ok 4\n" : "not ok 4\n"; -(When dealing with floating-point comparisons, it is often useful not to check -for equality, but rather the difference being below a certain epsilon factor, -0.01 in this case) +(When dealing with floating-point comparisons, it is best to not check for +equality, but rather that the difference between the expected and actual +result is below a certain amount (called epsilon) which is 0.01 in this case) -Run "make test" and all should be well. +Run "C<make test>" and all should be well. -=head2 WHAT HAS HAPPENED HERE? +=head2 What has happened here? Unlike previous examples, we've now run h2xs on a real include file. This has caused some extra goodies to appear in both the .pm and .xs files. @@ -594,76 +632,214 @@ has caused some extra goodies to appear in both the .pm and .xs files. =item * -In the .xs file, there's now a #include declaration with the full path to -the mylib.h header file. +In the .xs file, there's now a #include directive with the absolute path to +the mylib.h header file. We changed this to a relative path so that we +could move the extension directory if we wanted to. =item * There's now some new C code that's been added to the .xs file. The purpose of the C<constant> routine is to make the values that are #define'd in the -header file available to the Perl script (in this case, by calling -C<&main::TESTVAL>). There's also some XS code to allow calls to the +header file accessible by the Perl script (by calling either C<TESTVAL> or +C<&Mytest2::TESTVAL>). There's also some XS code to allow calls to the C<constant> routine. =item * -The .pm file has exported the name TESTVAL in the @EXPORT array. This -could lead to name clashes. A good rule of thumb is that if the #define -is going to be used by only the C routines themselves, and not by the user, -they should be removed from the @EXPORT array. Alternately, if you don't -mind using the "fully qualified name" of a variable, you could remove most -or all of the items in the @EXPORT array. +The .pm file originally exported the name C<TESTVAL> in the C<@EXPORT> array. +This could lead to name clashes. A good rule of thumb is that if the #define +is only going to be used by the C routines themselves, and not by the user, +they should be removed from the C<@EXPORT> array. Alternately, if you don't +mind using the "fully qualified name" of a variable, you could move most +or all of the items from the C<@EXPORT> array into the C<@EXPORT_OK> array. =item * -If our include file contained #include directives, these would not be -processed at all by h2xs. There is no good solution to this right now. +If our include file had contained #include directives, these would not have +been processed by h2xs. There is no good solution to this right now. -=back +=item * We've also told Perl about the library that we built in the mylib -subdirectory. That required the addition of only the MYEXTLIB variable +subdirectory. That required only the addition of the C<MYEXTLIB> variable to the WriteMakefile call and the replacement of the postamble subroutine to cd into the subdirectory and run make. The Makefile.PL for the library is a bit more complicated, but not excessively so. Again we replaced the postamble subroutine to insert our own code. This code -specified simply that the library to be created here was a static -archive (as opposed to a dynamically loadable library) and provided the +simply specified that the library to be created here was a static archive +library (as opposed to a dynamically loadable library) and provided the commands to build it. -=head2 SPECIFYING ARGUMENTS TO XSUBPP +=back + +=head2 Anatomy of .xs file + +The .xs file of L<"EXAMPLE 4"> contained some new elements. To understand +the meaning of these elements, pay attention to the line which reads + + MODULE = Mytest2 PACKAGE = Mytest2 + +Anything before this line is plain C code which describes which headers +to include, and defines some convenience functions. No translations are +performed on this part, it goes into the generated output C file as is. + +Anything after this line is the description of XSUB functions. +These descriptions are translated by B<xsubpp> into C code which +implements these functions using Perl calling conventions, and which +makes these functions visible from Perl interpreter. + +Pay a special attention to the function C<constant>. This name appears +twice in the generated .xs file: once in the first part, as a static C +function, the another time in the second part, when an XSUB interface to +this static C function is defined. + +This is quite typical for .xs files: usually the .xs file provides +an interface to an existing C function. Then this C function is defined +somewhere (either in an external library, or in the first part of .xs file), +and a Perl interface to this function (i.e. "Perl glue") is described in the +second part of .xs file. The situation in L<"EXAMPLE 1">, L<"EXAMPLE 2">, +and L<"EXAMPLE 3">, when all the work is done inside the "Perl glue", is +somewhat of an exception rather than the rule. + +=head2 Getting the fat out of XSUBs + +In L<"EXAMPLE 4"> the second part of .xs file contained the following +description of an XSUB: + + double + foo(a,b,c) + int a + long b + const char * c + OUTPUT: + RETVAL + +Note that in contrast with L<"EXAMPLE 1">, L<"EXAMPLE 2"> and L<"EXAMPLE 3">, +this description does not contain the actual I<code> for what is done +is done during a call to Perl function foo(). To understand what is going +on here, one can add a CODE section to this XSUB: + + double + foo(a,b,c) + int a + long b + const char * c + CODE: + RETVAL = foo(a,b,c); + OUTPUT: + RETVAL + +However, these two XSUBs provide almost identical generated C code: B<xsubpp> +compiler is smart enough to figure out the C<CODE:> section from the first +two lines of the description of XSUB. What about C<OUTPUT:> section? In +fact, that is absolutely the same! The C<OUTPUT:> section can be removed +as well, I<as far as C<CODE:> section or C<PPCODE:> section> is not +specified: B<xsubpp> can see that it needs to generate a function call +section, and will autogenerate the OUTPUT section too. Thus one can +shortcut the XSUB to become: + + double + foo(a,b,c) + int a + long b + const char * c + +Can we do the same with an XSUB + + int + is_even(input) + int input + CODE: + RETVAL = (input % 2 == 0); + OUTPUT: + RETVAL + +of L<"EXAMPLE 2">? To do this, one needs to define a C function C<int +is_even(int input)>. As we saw in L<Anatomy of .xs file>, a proper place +for this definition is in the first part of .xs file. In fact a C function + + int + is_even(int arg) + { + return (arg % 2 == 0); + } + +is probably overkill for this. Something as simple as a C<#define> will +do too: + + #define is_even(arg) ((arg) % 2 == 0) + +After having this in the first part of .xs file, the "Perl glue" part becomes +as simple as + + int + is_even(input) + int input + +This technique of separation of the glue part from the workhorse part has +obvious tradeoffs: if you want to change a Perl interface, you need to +change two places in your code. However, it removes a lot of clutter, +and makes the workhorse part independent from idiosyncrasies of Perl calling +convention. (In fact, there is nothing Perl-specific in the above description, +a different version of B<xsubpp> might have translated this to TCL glue or +Python glue as well.) + +=head2 More about XSUB arguments With the completion of Example 4, we now have an easy way to simulate some real-life libraries whose interfaces may not be the cleanest in the world. We shall now continue with a discussion of the arguments passed to the -xsubpp compiler. +B<xsubpp> compiler. + +When you specify arguments to routines in the .xs file, you are really +passing three pieces of information for each argument listed. The first +piece is the order of that argument relative to the others (first, second, +etc). The second is the type of argument, and consists of the type +declaration of the argument (e.g., int, char*, etc). The third piece is +the calling convention for the argument in the call to the library function. + +While Perl passes arguments to functions by reference, +C passes arguments by value; to implement a C function which modifies data +of one of the "arguments", the actual argument of this C function would be +a pointer to the data. Thus two C functions with declarations -When you specify arguments in the .xs file, you are really passing three -pieces of information for each one listed. The first piece is the order -of that argument relative to the others (first, second, etc). The second -is the type of argument, and consists of the type declaration of the -argument (e.g., int, char*, etc). The third piece is the exact way in -which the argument should be used in the call to the library function -from this XSUB. This would mean whether or not to place a "&" before -the argument or not, meaning the argument expects to be passed the address -of the specified data type. + int string_length(char *s); + int upper_case_char(char *cp); -There is a difference between the two arguments in this hypothetical function: +may have completely different semantics: the first one may inspect an array +of chars pointed by s, and the second one may immediately dereference C<cp> +and manipulate C<*cp> only (using the return value as, say, a success +indicator). From Perl one would use these functions in +a completely different manner. + +One conveys this info to B<xsubpp> by replacing C<*> before the +argument by C<&>. C<&> means that the argument should be passed to a library +function by its address. The above two function may be XSUB-ified as + + int + string_length(s) + char * s + + int + upper_case_char(cp) + char &cp + +For example, consider: int foo(a,b) char &a char * b -The first argument to this function would be treated as a char and assigned +The first Perl argument to this function would be treated as a char and assigned to the variable a, and its address would be passed into the function foo. -The second argument would be treated as a string pointer and assigned to the +The second Perl argument would be treated as a string pointer and assigned to the variable b. The I<value> of b would be passed into the function foo. The -actual call to the function foo that xsubpp generates would look like this: +actual call to the function foo that B<xsubpp> generates would look like this: foo(&a, b); -Xsubpp will identically parse the following function argument lists: +B<xsubpp> will parse the following function argument lists identically: char &a char&a @@ -672,40 +848,72 @@ Xsubpp will identically parse the following function argument lists: However, to help ease understanding, it is suggested that you place a "&" next to the variable name and away from the variable type), and place a "*" near the variable type, but away from the variable name (as in the -complete example above). By doing so, it is easy to understand exactly -what will be passed to the C function -- it will be whatever is in the -"last column". +call to foo above). By doing so, it is easy to understand exactly what +will be passed to the C function -- it will be whatever is in the "last +column". You should take great pains to try to pass the function the type of variable it wants, when possible. It will save you a lot of trouble in the long run. -=head2 THE ARGUMENT STACK +=head2 The Argument Stack If we look at any of the C code generated by any of the examples except example 1, you will notice a number of references to ST(n), where n is -usually 0. The "ST" is actually a macro that points to the n'th argument -on the argument stack. ST(0) is thus the first argument passed to the -XSUB, ST(1) is the second argument, and so on. +usually 0. "ST" is actually a macro that points to the n'th argument +on the argument stack. ST(0) is thus the first argument on the stack and +therefore the first argument passed to the XSUB, ST(1) is the second +argument, and so on. -When you list the arguments to the XSUB in the .xs file, that tells xsubpp +When you list the arguments to the XSUB in the .xs file, that tells B<xsubpp> which argument corresponds to which of the argument stack (i.e., the first one listed is the first argument, and so on). You invite disaster if you do not list them in the same order as the function expects them. -=head2 EXTENDING YOUR EXTENSION +The actual values on the argument stack are pointers to the values passed +in. When an argument is listed as being an OUTPUT value, its corresponding +value on the stack (i.e., ST(0) if it was the first argument) is changed. +You can verify this by looking at the C code generated for Example 3. +The code for the round() XSUB routine contains lines that look like this: + + double arg = (double)SvNV(ST(0)); + /* Round the contents of the variable arg */ + sv_setnv(ST(0), (double)arg); + +The arg variable is initially set by taking the value from ST(0), then is +stored back into ST(0) at the end of the routine. + +XSUBs are also allowed to return lists, not just scalars. This must be +done by manipulating stack values ST(0), ST(1), etc, in a subtly +different way. See L<perlxs> for details. + +XSUBs are also allowed to avoid automatic conversion of Perl function arguments +to C function arguments. See L<perlxs> for details. Some people prefer +manual conversion by inspecting C<ST(i)> even in the cases when automatic +conversion will do, arguing that this makes the logic of an XSUB call clearer. +Compare with L<"Getting the fat out of XSUBs"> for a similar tradeoff of +a complete separation of "Perl glue" and "workhorse" parts of an XSUB. + +While experts may argue about these idioms, a novice to Perl guts may +prefer a way which is as little Perl-guts-specific as possible, meaning +automatic conversion and automatic call generation, as in +L<"Getting the fat out of XSUBs">. This approach has the additional +benefit of protecting the XSUB writer from future changes to the Perl API. + +=head2 Extending your Extension Sometimes you might want to provide some extra methods or subroutines to assist in making the interface between Perl and your extension simpler or easier to understand. These routines should live in the .pm file. Whether they are automatically loaded when the extension itself is loaded -or loaded only when called depends on where in the .pm file the subroutine -definition is placed. +or only loaded when called depends on where in the .pm file the subroutine +definition is placed. You can also consult L<AutoLoader> for an alternate +way to store and load your extra subroutines. -=head2 DOCUMENTING YOUR EXTENSION +=head2 Documenting your Extension There is absolutely no excuse for not documenting your extension. Documentation belongs in the .pm file. This file will be fed to pod2man, -and the embedded documentation will be converted to the manpage format, +and the embedded documentation will be converted to the man page format, then placed in the blib directory. It will be copied to Perl's man page directory when the extension is installed. @@ -715,19 +923,199 @@ as the comment inside the .pm file explains. See L<perlpod> for more information about the pod format. -=head2 INSTALLING YOUR EXTENSION +=head2 Installing your Extension Once your extension is complete and passes all its tests, installing it -is quite simple: you simply run "make install". You will either need +is quite simple: you simply run "make install". You will either need to have write permission into the directories where Perl is installed, or ask your system administrator to run the make for you. -=head2 SEE ALSO +Alternately, you can specify the exact directory to place the extension's +files by placing a "PREFIX=/destination/directory" after the make install. +(or in between the make and install if you have a brain-dead version of make). +This can be very useful if you are building an extension that will eventually +be distributed to multiple systems. You can then just archive the files in +the destination directory and distribute them to your destination systems. + +=head2 EXAMPLE 5 + +In this example, we'll do some more work with the argument stack. The +previous examples have all returned only a single value. We'll now +create an extension that returns an array. + +This extension is very Unix-oriented (struct statfs and the statfs system +call). If you are not running on a Unix system, you can substitute for +statfs any other function that returns multiple values, you can hard-code +values to be returned to the caller (although this will be a bit harder +to test the error case), or you can simply not do this example. If you +change the XSUB, be sure to fix the test cases to match the changes. + +Return to the Mytest directory and add the following code to the end of +Mytest.xs: + + void + statfs(path) + char * path + INIT: + int i; + struct statfs buf; + + PPCODE: + i = statfs(path, &buf); + if (i == 0) { + XPUSHs(sv_2mortal(newSVnv(buf.f_bavail))); + XPUSHs(sv_2mortal(newSVnv(buf.f_bfree))); + XPUSHs(sv_2mortal(newSVnv(buf.f_blocks))); + XPUSHs(sv_2mortal(newSVnv(buf.f_bsize))); + XPUSHs(sv_2mortal(newSVnv(buf.f_ffree))); + XPUSHs(sv_2mortal(newSVnv(buf.f_files))); + XPUSHs(sv_2mortal(newSVnv(buf.f_type))); + XPUSHs(sv_2mortal(newSVnv(buf.f_fsid[0]))); + XPUSHs(sv_2mortal(newSVnv(buf.f_fsid[1]))); + } else { + XPUSHs(sv_2mortal(newSVnv(errno))); + } + +You'll also need to add the following code to the top of the .xs file, just +after the include of "XSUB.h": + + #include <sys/vfs.h> + +Also add the following code segment to test.pl while incrementing the "1..9" +string in the BEGIN block to "1..11": + + @a = &Mytest::statfs("/blech"); + print ((scalar(@a) == 1 && $a[0] == 2) ? "ok 10\n" : "not ok 10\n"); + @a = &Mytest::statfs("/"); + print scalar(@a) == 9 ? "ok 11\n" : "not ok 11\n"; + +=head2 New Things in this Example + +This example added quite a few new concepts. We'll take them one at a time. + +=over 4 + +=item * + +The INIT: directive contains code that will be placed immediately after +the argument stack is decoded. C does not allow variable declarations at +arbitrary locations inside a function, +so this is usually the best way to declare local variables needed by the XSUB. +(Alternatively, one could put the whole C<PPCODE:> section into braces, and +put these declarations on top.) + +=item * + +This routine also returns a different number of arguments depending on the +success or failure of the call to statfs. If there is an error, the error +number is returned as a single-element array. If the call is successful, +then a 9-element array is returned. Since only one argument is passed into +this function, we need room on the stack to hold the 9 values which may be +returned. + +We do this by using the PPCODE: directive, rather than the CODE: directive. +This tells B<xsubpp> that we will be managing the return values that will be +put on the argument stack by ourselves. + +=item * + +When we want to place values to be returned to the caller onto the stack, +we use the series of macros that begin with "XPUSH". There are five +different versions, for placing integers, unsigned integers, doubles, +strings, and Perl scalars on the stack. In our example, we placed a +Perl scalar onto the stack. (In fact this is the only macro which +can be used to return multiple values.) + +The XPUSH* macros will automatically extend the return stack to prevent +it from being overrun. You push values onto the stack in the order you +want them seen by the calling program. + +=item * + +The values pushed onto the return stack of the XSUB are actually mortal SV's. +They are made mortal so that once the values are copied by the calling +program, the SV's that held the returned values can be deallocated. +If they were not mortal, then they would continue to exist after the XSUB +routine returned, but would not be accessible. This is a memory leak. + +=item * + +If we were interested in performance, not in code compactness, in the success +branch we would not use C<XPUSHs> macros, but C<PUSHs> macros, and would +pre-extend the stack before pushing the return values: + + EXTEND(SP, 9); + +The tradeoff is that one needs to calculate the number of return values +in advance (though overextending the stack will not typically hurt +anything but memory consumption). + +Similarly, in the failure branch we could use C<PUSHs> I<without> extending +the stack: the Perl function reference comes to an XSUB on the stack, thus +the stack is I<always> large enough to take one return value. + +=back + +=head2 EXAMPLE 6 (Coming Soon) + +Passing in and returning references to arrays and/or hashes + +=head2 EXAMPLE 7 (Coming Soon) + +XPUSH args AND set RETVAL AND assign return value to array + +=head2 EXAMPLE 8 (Coming Soon) + +Setting $! + +=head2 EXAMPLE 9 (Coming Soon) + +Getting fd's from filehandles + +=head2 Troubleshooting these Examples + +As mentioned at the top of this document, if you are having problems with +these example extensions, you might see if any of these help you. + +=over 4 + +=item * + +In versions of 5.002 prior to the gamma version, the test script in Example +1 will not function properly. You need to change the "use lib" line to +read: + + use lib './blib'; + +=item * + +In versions of 5.002 prior to version 5.002b1h, the test.pl file was not +automatically created by h2xs. This means that you cannot say "make test" +to run the test script. You will need to add the following line before the +"use extension" statement: + + use lib './blib'; + +=item * + +In versions 5.000 and 5.001, instead of using the above line, you will need +to use the following line: + + BEGIN { unshift(@INC, "./blib") } + +=item * + +This document assumes that the executable named "perl" is Perl version 5. +Some systems may have installed Perl version 5 as "perl5". + +=back + +=head1 See also For more information, consult L<perlguts>, L<perlxs>, L<perlmod>, and L<perlpod>. -=head2 Author +=head1 Author Jeff Okamoto <F<okamoto@corp.hp.com>> @@ -736,4 +1124,4 @@ and Tim Bunce. =head2 Last Changed -1996/7/10 +1999/11/30 diff --git a/contrib/perl5/pod/pod2latex.PL b/contrib/perl5/pod/pod2latex.PL index feed98e..71115f3 100644 --- a/contrib/perl5/pod/pod2latex.PL +++ b/contrib/perl5/pod/pod2latex.PL @@ -101,7 +101,6 @@ print OUT <<'!NO!SUBS!'; # Translation of HTML escapes of various European accents might be wrong. -$/ = ""; # record separator is blank lines # TeX special characters. ##$tt_ables = "!@*()-=+|;:'\"`,./?<>"; $backslash_escapables = "#\$%&{}_"; @@ -119,13 +118,16 @@ $indent = 0; # parse the pods, produce LaTeX. -open(POD,"<$ARGV[0]") || die "cant open $ARGV[0]"; +use Pod::Plainer; +open(POD,"-|") or Pod::Plainer -> new() -> parse_from_file($ARGV[0]), exit; + ($pod=$ARGV[0]) =~ s/\.pod$//; open(LATEX,">$pod.tex"); &do_hdr(); $cutting = 1; $begun = ""; +$/ = ""; # record separator is blank lines while (<POD>) { if ($cutting) { next unless /^=/; @@ -314,6 +316,8 @@ while (<POD>) { } }gex; + s/X<([^<>]*)>/\\index{$1}/g; + s/Z<>/\\&/g; # the "don't format me" thing # comes last because not subject to reprocessing @@ -416,7 +420,7 @@ while (<POD>) { } print LATEX "\n\\begin{$listingcmd}\n"; push(@listingcmd,$listingcmd); - } elsif ($lastcmd ne 'item') { + } elsif ( !@listingcmd ) { warn "Illegal '=item' command without preceding 'over':"; warn "=item $bareitem"; } diff --git a/contrib/perl5/pod/pod2man.PL b/contrib/perl5/pod/pod2man.PL index 3c55d6e..bf35cff 100644 --- a/contrib/perl5/pod/pod2man.PL +++ b/contrib/perl5/pod/pod2man.PL @@ -9,7 +9,6 @@ use Cwd; # have to mention them as if they were shell variables, not # %Config entries. Thus you write # $startperl -# $man3ext # to ensure Configure will look for $Config{startperl}. # This forces PL files to create target in same directory as PL file. @@ -29,1191 +28,445 @@ print "Extracting $file (with variable substitutions)\n"; print OUT <<"!GROK!THIS!"; $Config{startperl} eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' - if \$running_under_some_shell; - -\$DEF_PM_SECTION = '$Config{man3ext}' || '3'; + if \$running_under_some_shell; !GROK!THIS! # In the following, perl variables are not expanded during extraction. print OUT <<'!NO!SUBS!'; -=head1 NAME - -pod2man - translate embedded Perl pod directives into man pages - -=head1 SYNOPSIS - -B<pod2man> -[ B<--section=>I<manext> ] -[ B<--release=>I<relpatch> ] -[ B<--center=>I<string> ] -[ B<--date=>I<string> ] -[ B<--fixed=>I<font> ] -[ B<--official> ] -[ B<--lax> ] -I<inputfile> - -=head1 DESCRIPTION - -B<pod2man> converts its input file containing embedded pod directives (see -L<perlpod>) into nroff source suitable for viewing with nroff(1) or -troff(1) using the man(7) macro set. - -Besides the obvious pod conversions, B<pod2man> also takes care of -func(), func(n), and simple variable references like $foo or @bar so -you don't have to use code escapes for them; complex expressions like -C<$fred{'stuff'}> will still need to be escaped, though. Other nagging -little roffish things that it catches include translating the minus in -something like foo-bar, making a long dash--like this--into a real em -dash, fixing up "paired quotes", putting a little space after the -parens in something like func(), making C++ and PI look right, making -double underbars have a little tiny space between them, making ALLCAPS -a teeny bit smaller in troff(1), and escaping backslashes so you don't -have to. - -=head1 OPTIONS - -=over 8 - -=item center - -Set the centered header to a specific string. The default is -"User Contributed Perl Documentation", unless the C<--official> flag is -given, in which case the default is "Perl Programmers Reference Guide". - -=item date - -Set the left-hand footer string to this value. By default, -the modification date of the input file will be used. - -=item fixed - -The fixed font to use for code refs. Defaults to CW. - -=item official - -Set the default header to indicate that this page is of -the standard release in case C<--center> is not given. - -=item release - -Set the centered footer. By default, this is the current -perl release. - -=item section - -Set the section for the C<.TH> macro. The standard conventions on -sections are to use 1 for user commands, 2 for system calls, 3 for -functions, 4 for devices, 5 for file formats, 6 for games, 7 for -miscellaneous information, and 8 for administrator commands. This works -best if you put your Perl man pages in a separate tree, like -F</usr/local/perl/man/>. By default, section 1 will be used -unless the file ends in F<.pm> in which case section 3 will be selected. - -=item lax - -Don't complain when required sections aren't present. - -=back - -=head1 Anatomy of a Proper Man Page - -For those not sure of the proper layout of a man page, here's -an example of the skeleton of a proper man page. Head of the -major headers should be setout as a C<=head1> directive, and -are historically written in the rather startling ALL UPPER CASE -format, although this is not mandatory. -Minor headers may be included using C<=head2>, and are -typically in mixed case. - -=over 10 - -=item NAME +# pod2man -- Convert POD data to formatted *roff input. +# $Id: pod2man.PL,v 1.2 2000/03/16 21:08:23 eagle Exp $ +# +# Copyright 1999, 2000 by Russ Allbery <rra@stanford.edu> +# +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl itself. + +require 5.004; + +use Getopt::Long qw(GetOptions); +use Pod::Man (); +use Pod::Usage qw(pod2usage); + +use strict; + +# Insert -- into @ARGV before any single dash argument to hide it from +# Getopt::Long; we want to interpret it as meaning stdin (which Pod::Parser +# does correctly). +my $stdin; +@ARGV = map { $_ eq '-' && !$stdin++ ? ('--', $_) : $_ } @ARGV; + +# Parse our options, trying to retain backwards compatibility with pod2man +# but allowing short forms as well. --lax is currently ignored. +my %options; +Getopt::Long::config ('bundling_override'); +GetOptions (\%options, 'section|s=s', 'release|r=s', 'center|c=s', + 'date|d=s', 'fixed=s', 'fixedbold=s', 'fixeditalic=s', + 'fixedbolditalic=s', 'official|o', 'lax|l', 'help|h') or exit 1; +pod2usage (0) if $options{help}; + +# Official sets --center, but don't override things explicitly set. +if ($options{official} && !defined $options{center}) { + $options{center} = 'Perl Programmers Reference Guide'; +} -Mandatory section; should be a comma-separated list of programs or -functions documented by this podpage, such as: +# Initialize and run the formatter. +my $parser = Pod::Man->new (%options); +$parser->parse_from_file (@ARGV); - foo, bar - programs to do something +__END__ -=item SYNOPSIS +=head1 NAME -A short usage summary for programs and functions, which -may someday be deemed mandatory. +pod2man - Convert POD data to formatted *roff input -=item DESCRIPTION +=head1 SYNOPSIS -Long drawn out discussion of the program. It's a good idea to break this -up into subsections using the C<=head2> directives, like +pod2man [B<--section>=I<manext>] [B<--release>=I<version>] +[B<--center>=I<string>] [B<--date>=I<string>] [B<--fixed>=I<font>] +[B<--fixedbold>=I<font>] [B<--fixeditalic>=I<font>] +[B<--fixedbolditalic>=I<font>] [B<--official>] [B<--lax>] [I<input> +[I<output>]] - =head2 A Sample Subection +pod2man B<--help> - =head2 Yet Another Sample Subection +=head1 DESCRIPTION -=item OPTIONS +B<pod2man> is a front-end for Pod::Man, using it to generate *roff input +from POD source. The resulting *roff code is suitable for display on a +terminal using nroff(1), normally via man(1), or printing using troff(1). + +I<input> is the file to read for POD source (the POD can be embedded in +code). If I<input> isn't given, it defaults to STDIN. I<output>, if given, +is the file to which to write the formatted output. If I<output> isn't +given, the formatted output is written to STDOUT. + +B<--section>, B<--release>, B<--center>, B<--date>, and B<--official> can be +used to set the headers and footers to use; if not given, Pod::Man will +assume various defaults. See below or L<Pod::Man> for details. + +B<pod2man> assumes that your *roff formatters have a fixed-width font named +CW. If yours is called something else (like CR), use B<--fixed> to specify +it. This generally only matters for troff output for printing. Similarly, +you can set the fonts used for bold, italic, and bold italic fixed-width +output. + +Besides the obvious pod conversions, Pod::Man, and therefore pod2man also +takes care of formatting func(), func(n), and simple variable references +like $foo or @bar so you don't have to use code escapes for them; complex +expressions like C<$fred{'stuff'}> will still need to be escaped, though. +It also translates dashes that aren't used as hyphens into en dashes, makes +long dashes--like this--into proper em dashes, fixes "paired quotes," and +takes care of several other troff-specific tweaks. See L<Pod::Man> for +complete information. -Some people make this separate from the description. +=head1 OPTIONS -=item RETURN VALUE +=over 4 -What the program or function returns if successful. +=item B<-c> I<string>, B<--center>=I<string> -=item ERRORS +Sets the centered page header to I<string>. The default is "User +Contributed Perl Documentation", but also see B<--official> below. -Exceptions, return codes, exit stati, and errno settings. +=item B<-d> I<string>, B<--date>=I<string> -=item EXAMPLES +Set the left-hand footer string to this value. By default, the modification +date of the input file will be used, or the current date if input comes from +STDIN. -Give some example uses of the program. +=item B<--fixed>=I<font> -=item ENVIRONMENT +The fixed-width font to use for vertabim text and code. Defaults to CW. +Some systems may want CR instead. Only matters for troff(1) output. -Envariables this program might care about. +=item B<--fixedbold>=I<font> -=item FILES +Bold version of the fixed-width font. Defaults to CB. Only matters for +troff(1) output. -All files used by the program. You should probably use the FE<lt>E<gt> -for these. +=item B<--fixeditalic>=I<font> -=item SEE ALSO +Italic version of the fixed-width font (actually, something of a misnomer, +since most fixed-width fonts only have an oblique version, not an italic +version). Defaults to CI. Only matters for troff(1) output. -Other man pages to check out, like man(1), man(7), makewhatis(8), or catman(8). +=item B<--fixedbolditalic>=I<font> -=item NOTES +Bold italic (probably actually oblique) version of the fixed-width font. +Pod::Man doesn't assume you have this, and defaults to CB. Some systems +(such as Solaris) have this font available as CX. Only matters for troff(1) +output. -Miscellaneous commentary. +=item B<-h>, B<--help> -=item CAVEATS +Print out usage information. -Things to take special care with; sometimes called WARNINGS. +=item B<-l>, B<--lax> -=item DIAGNOSTICS +Don't complain when required sections are missing. Not currently used, as +POD checking functionality is not yet implemented in Pod::Man. -All possible messages the program can print out--and -what they mean. +=item B<-o>, B<--official> -=item BUGS +Set the default header to indicate that this page is part of the standard +Perl release, if B<--center> is not also given. -Things that are broken or just don't work quite right. +=item B<-r>, B<--release> -=item RESTRICTIONS +Set the centered footer. By default, this is the version of Perl you run +B<pod2man> under. Note that some system an macro sets assume that the +centered footer will be a modification date and will prepend something like +"Last modified: "; if this is the case, you may want to set B<--release> to +the last modified date and B<--date> to the version number. -Bugs you don't plan to fix :-) +=item B<-s>, B<--section> -=item AUTHOR +Set the section for the C<.TH> macro. The standard section numbering +convention is to use 1 for user commands, 2 for system calls, 3 for +functions, 4 for devices, 5 for file formats, 6 for games, 7 for +miscellaneous information, and 8 for administrator commands. There is a lot +of variation here, however; some systems (like Solaris) use 4 for file +formats, 5 for miscellaneous information, and 7 for devices. Still others +use 1m instead of 8, or some mix of both. About the only section numbers +that are reliably consistent are 1, 2, and 3. -Who wrote it (or AUTHORS if multiple). +By default, section 1 will be used unless the file ends in .pm in which case +section 3 will be selected. -=item HISTORY +=back -Programs derived from other sources sometimes have this, or -you might keep a modification log here. +=head1 DIAGNOSTICS -=back +If B<pod2man> fails with errors, see L<Pod::Man> and L<Pod::Parser> for +information about what those errors might mean. =head1 EXAMPLES pod2man program > program.1 - pod2man some_module.pm > /usr/perl/man/man3/some_module.3 + pod2man SomeModule.pm /usr/perl/man/man3/SomeModule.3 pod2man --section=7 note.pod > note.7 -=head1 DIAGNOSTICS +If you would like to print out a lot of man page continuously, you probably +want to set the C and D registers to set contiguous page numbering and +even/odd paging, at least on some versions of man(7). -The following diagnostics are generated by B<pod2man>. Items -marked "(W)" are non-fatal, whereas the "(F)" errors will cause -B<pod2man> to immediately exit with a non-zero status. + troff -man -rC1 -rD1 perl.1 perldata.1 perlsyn.1 ... -=over 4 +To get index entries on stderr, turn on the F register, as in: -=item bad option in paragraph %d of %s: ``%s'' should be [%s]<%s> + troff -man -rF1 perl.1 -(W) If you start include an option, you should set it off -as bold, italic, or code. +The indexing merely outputs messages via C<.tm> for each major page, +section, subsection, item, and any C<XE<lt>E<gt>> directives. See +L<Pod::Man> for more details. -=item can't open %s: %s +=head1 BUGS -(F) The input file wasn't available for the given reason. +Lots of this documentation is duplicated from L<Pod::Man>. -=item Improper man page - no dash in NAME header in paragraph %d of %s +POD checking and the corresponding B<--lax> option don't work yet. -(W) The NAME header did not have an isolated dash in it. This is -considered important. +=head1 NOTES -=item Invalid man page - no NAME line in %s +For those not sure of the proper layout of a man page, here are some notes +on writing a proper man page. -(F) You did not include a NAME header, which is essential. +The name of the program being documented is conventionally written in bold +(using BE<lt>E<gt>) wherever it occurs, as are all program options. +Arguments should be written in italics (IE<lt>E<gt>). Functions are +traditionally written in italics; if you write a function as function(), +Pod::Man will take care of this for you. Literal code or commands should +be in CE<lt>E<gt>. References to other man pages should be in the form +C<manpage(section)>, and Pod::Man will automatically format those +appropriately. As an exception, it's traditional not to use this form when +referring to module documentation; use C<LE<lt>Module::NameE<gt>> instead. -=item roff font should be 1 or 2 chars, not `%s' (F) +References to other programs or functions are normally in the form of man +page references so that cross-referencing tools can provide the user with +links and the like. It's possible to overdo this, though, so be careful not +to clutter your documentation with too much markup. -(F) The font specified with the C<--fixed> option was not -a one- or two-digit roff font. +The major headers should be set out using a C<=head1> directive, and are +historically written in the rather startling ALL UPPER CASE format, although +this is not mandatory. Minor headers may be included using C<=head2>, and +are typically in mixed case. -=item %s is missing required section: %s +The standard sections of a manual page are: -(W) Required sections include NAME, DESCRIPTION, and if you're -using a section starting with a 3, also a SYNOPSIS. Actually, -not having a NAME is a fatal. +=over 4 -=item Unknown escape: %s in %s +=item NAME -(W) An unknown HTML entity (probably for an 8-bit character) was given via -a C<EE<lt>E<gt>> directive. Besides amp, lt, gt, and quot, recognized -entities are Aacute, aacute, Acirc, acirc, AElig, aelig, Agrave, agrave, -Aring, aring, Atilde, atilde, Auml, auml, Ccedil, ccedil, Eacute, eacute, -Ecirc, ecirc, Egrave, egrave, ETH, eth, Euml, euml, Iacute, iacute, Icirc, -icirc, Igrave, igrave, Iuml, iuml, Ntilde, ntilde, Oacute, oacute, Ocirc, -ocirc, Ograve, ograve, Oslash, oslash, Otilde, otilde, Ouml, ouml, szlig, -THORN, thorn, Uacute, uacute, Ucirc, ucirc, Ugrave, ugrave, Uuml, uuml, -Yacute, yacute, and yuml. +Mandatory section; should be a comma-separated list of programs or functions +documented by this podpage, such as: -=item Unmatched =back + foo, bar - programs to do something -(W) You have a C<=back> without a corresponding C<=over>. +Manual page indexers are often extremely picky about the format of this +section, so don't put anything in it except this line. A single dash, and +only a single dash, should separate the list of programs or functions from +the description. Functions should not be qualified with C<()> or the like. +The description should ideally fit on a single line, even if a man program +replaces the dash with a few tabs. -=item Unrecognized pod directive: %s +=item SYNOPSIS -(W) You specified a pod directive that isn't in the known list of -C<=head1>, C<=head2>, C<=item>, C<=over>, C<=back>, or C<=cut>. +A short usage summary for programs and functions. This section is mandatory +for section 3 pages. +=item DESCRIPTION -=back +Extended description and discussion of the program or functions, or the body +of the documentation for man pages that document something else. If +particularly long, it's a good idea to break this up into subsections +C<=head2> directives like: -=head1 NOTES + =head2 Normal Usage -If you would like to print out a lot of man page continuously, you -probably want to set the C and D registers to set contiguous page -numbering and even/odd paging, at least on some versions of man(7). -Settting the F register will get you some additional experimental -indexing: + =head2 Advanced Features - troff -man -rC1 -rD1 -rF1 perl.1 perldata.1 perlsyn.1 ... + =head2 Writing Configuration Files -The indexing merely outputs messages via C<.tm> for each -major page, section, subsection, item, and any C<XE<lt>E<gt>> -directives. +or whatever is appropriate for your documentation. +=item OPTIONS -=head1 RESTRICTIONS +Detailed description of each of the command-line options taken by the +program. This should be separate from the description for the use of things +like L<Pod::Usage|Pod::Usage>. This is normally presented as a list, with +each option as a separate C<=item>. The specific option string should be +enclosed in BE<lt>E<gt>. Any values that the option takes should be +enclosed in IE<lt>E<gt>. For example, the section for the option +B<--section>=I<manext> would be introduced with: -None at this time. + =item B<--section>=I<manext> -=head1 BUGS +Synonymous options (like both the short and long forms) are separated by a +comma and a space on the same C<=item> line, or optionally listed as their +own item with a reference to the canonical name. For example, since +B<--section> can also be written as B<-s>, the above would be: -The =over and =back directives don't really work right. They -take absolute positions instead of offsets, don't nest well, and -making people count is suboptimal in any event. + =item B<-s> I<manext>, B<--section>=I<manext> -=head1 AUTHORS +(Writing the short option first is arguably easier to read, since the long +option is long enough to draw the eye to it anyway and the short option can +otherwise get lost in visual noise.) -Original prototype by Larry Wall, but so massively hacked over by -Tom Christiansen such that Larry probably doesn't recognize it anymore. +=item RETURN VALUE -=cut +What the program or function returns, if successful. This section can be +omitted for programs whose precise exit codes aren't important, provided +they return 0 on success as is standard. It should always be present for +functions. -$/ = ""; -$cutting = 1; -@Indices = (); - -# We try first to get the version number from a local binary, in case we're -# running an installed version of Perl to produce documentation from an -# uninstalled newer version's pod files. -if ($^O ne 'plan9' and $^O ne 'dos' and $^O ne 'os2' and $^O ne 'MSWin32') { - my $perl = (-x './perl' && -f './perl' ) ? - './perl' : - ((-x '../perl' && -f '../perl') ? - '../perl' : - ''); - ($version,$patch) = `$perl -e 'print $]'` =~ /^(\d\.\d{3})(\d{2})?/ if $perl; -} -# No luck; we'll just go with the running Perl's version -($version,$patch) = $] =~ /^(.{5})(\d{2})?/ unless $version; -$DEF_RELEASE = "perl $version"; -$DEF_RELEASE .= ", patch $patch" if $patch; - - -sub makedate { - my $secs = shift; - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($secs); - my $mname = (qw{Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec})[$mon]; - $year += 1900; - return "$mday/$mname/$year"; -} +=item ERRORS -use Getopt::Long; - -$DEF_SECTION = 1; -$DEF_CENTER = "User Contributed Perl Documentation"; -$STD_CENTER = "Perl Programmers Reference Guide"; -$DEF_FIXED = 'CW'; -$DEF_LAX = 0; - -sub usage { - warn "$0: @_\n" if @_; - die <<EOF; -usage: $0 [options] podpage -Options are: - --section=manext (default "$DEF_SECTION") - --release=relpatch (default "$DEF_RELEASE") - --center=string (default "$DEF_CENTER") - --date=string (default "$DEF_DATE") - --fixed=font (default "$DEF_FIXED") - --official (default NOT) - --lax (default NOT) -EOF -} +Exceptions, error return codes, exit statuses, and errno settings. +Typically used for function documentation; program documentation uses +DIAGNOSTICS instead. The general rule of thumb is that errors printed to +STDOUT or STDERR and intended for the end user are documented in DIAGNOSTICS +while errors passed internal to the calling program and intended for other +programmers are documented in ERRORS. When documenting a function that sets +errno, a full list of the possible errno values should be given here. -$uok = GetOptions( qw( - section=s - release=s - center=s - date=s - fixed=s - official - lax - help)); +=item DIAGNOSTICS -$DEF_DATE = makedate((stat($ARGV[0]))[9] || time()); +All possible messages the program can print out--and what they mean. You +may wish to follow the same documentation style as the Perl documentation; +see perldiag(1) for more details (and look at the POD source as well). -usage("Usage error!") unless $uok; -usage() if $opt_help; -usage("Need one and only one podpage argument") unless @ARGV == 1; +If applicable, please include details on what the user should do to correct +the error; documenting an error as indicating "the input buffer is too +small" without telling the user how to increase the size of the input buffer +(or at least telling them that it isn't possible) aren't very useful. -$section = $opt_section || ($ARGV[0] =~ /\.pm$/ - ? $DEF_PM_SECTION : $DEF_SECTION); -$RP = $opt_release || $DEF_RELEASE; -$center = $opt_center || ($opt_official ? $STD_CENTER : $DEF_CENTER); -$lax = $opt_lax || $DEF_LAX; +=item EXAMPLES -$CFont = $opt_fixed || $DEF_FIXED; +Give some example uses of the program or function. Don't skimp; users often +find this the most useful part of the documentation. The examples are +generally given as verbatim paragraphs. -if (length($CFont) == 2) { - $CFont_embed = "\\f($CFont"; -} -elsif (length($CFont) == 1) { - $CFont_embed = "\\f$CFont"; -} -else { - die "roff font should be 1 or 2 chars, not `$CFont_embed'"; -} +Don't just present an example without explaining what it does. Adding a +short paragraph saying what the example will do can increase the value of +the example immensely. -$date = $opt_date || $DEF_DATE; +=item ENVIRONMENT -for (qw{NAME DESCRIPTION}) { -# for (qw{NAME DESCRIPTION AUTHOR}) { - $wanna_see{$_}++; -} -$wanna_see{SYNOPSIS}++ if $section =~ /^3/; +Environment variables that the program cares about, normally presented as a +list using C<=over>, C<=item>, and C<=back>. For example: + =over 6 -$name = @ARGV ? $ARGV[0] : "<STDIN>"; -$Filename = $name; -if ($section =~ /^1/) { - require File::Basename; - $name = uc File::Basename::basename($name); -} -$name =~ s/\.(pod|p[lm])$//i; - -# Lose everything up to the first of -# */lib/*perl* standard or site_perl module -# */*perl*/lib from -D prefix=/opt/perl -# */*perl*/ random module hierarchy -# which works. -$name =~ s-//+-/-g; -if ($name =~ s-^.*?/lib/[^/]*perl[^/]*/--i - or $name =~ s-^.*?/[^/]*perl[^/]*/lib/--i - or $name =~ s-^.*?/[^/]*perl[^/]*/--i) { - # Lose ^site(_perl)?/. - $name =~ s-^site(_perl)?/--; - # Lose ^arch/. (XXX should we use Config? Just for archname?) - $name =~ s~^(.*-$^O|$^O-.*)/~~o; - # Lose ^version/. - $name =~ s-^\d+\.\d+/--; -} + =item HOME -# Translate Getopt/Long to Getopt::Long, etc. -$name =~ s(/)(::)g; - -if ($name ne 'something') { - FCHECK: { - open(F, "< $ARGV[0]") || die "can't open $ARGV[0]: $!"; - while (<F>) { - next unless /^=\b/; - if (/^=head1\s+NAME\s*$/) { # an /m would forgive mistakes - $_ = <F>; - unless (/\s*-+\s+/) { - $oops++; - warn "$0: Improper man page - no dash in NAME header in paragraph $. of $ARGV[0]\n" - } else { - my @n = split /\s+-+\s+/; - if (@n != 2) { - $oops++; - warn "$0: Improper man page - malformed NAME header in paragraph $. of $ARGV[0]\n" - } - else { - %namedesc = @n; - } - } - last FCHECK; - } - next if /^=cut\b/; # DB_File and Net::Ping have =cut before NAME - next if /^=pod\b/; # It is OK to have =pod before NAME - die "$0: Invalid man page - 1st pod line is not NAME in $ARGV[0]\n" unless $lax; - } - die "$0: Invalid man page - no documentation in $ARGV[0]\n" unless $lax; - } - close F; -} + Used to determine the user's home directory. F<.foorc> in this + directory is read for configuration details, if it exists. -print <<"END"; -.rn '' }` -''' \$RCSfile\$\$Revision\$\$Date\$ -''' -''' \$Log\$ -''' -.de Sh -.br -.if t .Sp -.ne 5 -.PP -\\fB\\\\\$1\\fR -.PP -.. -.de Sp -.if t .sp .5v -.if n .sp -.. -.de Ip -.br -.ie \\\\n(.\$>=3 .ne \\\\\$3 -.el .ne 3 -.IP "\\\\\$1" \\\\\$2 -.. -.de Vb -.ft $CFont -.nf -.ne \\\\\$1 -.. -.de Ve -.ft R - -.fi -.. -''' -''' -''' Set up \\*(-- to give an unbreakable dash; -''' string Tr holds user defined translation string. -''' Bell System Logo is used as a dummy character. -''' -.tr \\(*W-|\\(bv\\*(Tr -.ie n \\{\\ -.ds -- \\(*W- -.ds PI pi -.if (\\n(.H=4u)&(1m=24u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-12u'-\\" diablo 10 pitch -.if (\\n(.H=4u)&(1m=20u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-8u'-\\" diablo 12 pitch -.ds L" "" -.ds R" "" -''' \\*(M", \\*(S", \\*(N" and \\*(T" are the equivalent of -''' \\*(L" and \\*(R", except that they are used on ".xx" lines, -''' such as .IP and .SH, which do another additional levels of -''' double-quote interpretation -.ds M" """ -.ds S" """ -.ds N" """"" -.ds T" """"" -.ds L' ' -.ds R' ' -.ds M' ' -.ds S' ' -.ds N' ' -.ds T' ' -'br\\} -.el\\{\\ -.ds -- \\(em\\| -.tr \\*(Tr -.ds L" `` -.ds R" '' -.ds M" `` -.ds S" '' -.ds N" `` -.ds T" '' -.ds L' ` -.ds R' ' -.ds M' ` -.ds S' ' -.ds N' ` -.ds T' ' -.ds PI \\(*p -'br\\} -END - -print <<'END'; -.\" If the F register is turned on, we'll generate -.\" index entries out stderr for the following things: -.\" TH Title -.\" SH Header -.\" Sh Subsection -.\" Ip Item -.\" X<> Xref (embedded -.\" Of course, you have to process the output yourself -.\" in some meaninful fashion. -.if \nF \{ -.de IX -.tm Index:\\$1\t\\n%\t"\\$2" -.. -.nr % 0 -.rr F -.\} -END - -print <<"END"; -.TH $name $section "$RP" "$date" "$center" -.UC -END - -push(@Indices, qq{.IX Title "$name $section"}); - -while (($name, $desc) = each %namedesc) { - for ($name, $desc) { s/^\s+//; s/\s+$//; } - push(@Indices, qq(.IX Name "$name - $desc"\n)); -} + =back -print <<'END'; -.if n .hy 0 -.if n .na -.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' -.de CQ \" put $1 in typewriter font -END -print ".ft $CFont\n"; -print <<'END'; -'if n "\c -'if t \\&\\$1\c -'if n \\&\\$1\c -'if n \&" -\\&\\$2 \\$3 \\$4 \\$5 \\$6 \\$7 -'.ft R -.. -.\" @(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2 -. \" AM - accent mark definitions -.bd B 3 -. \" fudge factors for nroff and troff -.if n \{\ -. ds #H 0 -. ds #V .8m -. ds #F .3m -. ds #[ \f1 -. ds #] \fP -.\} -.if t \{\ -. ds #H ((1u-(\\\\n(.fu%2u))*.13m) -. ds #V .6m -. ds #F 0 -. ds #[ \& -. ds #] \& -.\} -. \" simple accents for nroff and troff -.if n \{\ -. ds ' \& -. ds ` \& -. ds ^ \& -. ds , \& -. ds ~ ~ -. ds ? ? -. ds ! ! -. ds / -. ds q -.\} -.if t \{\ -. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" -. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' -. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' -. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' -. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' -. ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10' -. ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m' -. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' -. ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10' -.\} -. \" troff and (daisy-wheel) nroff accents -.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' -.ds 8 \h'\*(#H'\(*b\h'-\*(#H' -.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#] -.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u' -.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u' -.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#] -.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] -.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' -.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' -.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] -.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] -.ds ae a\h'-(\w'a'u*4/10)'e -.ds Ae A\h'-(\w'A'u*4/10)'E -.ds oe o\h'-(\w'o'u*4/10)'e -.ds Oe O\h'-(\w'O'u*4/10)'E -. \" corrections for vroff -.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' -.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' -. \" for low resolution devices (crt and lpr) -.if \n(.H>23 .if \n(.V>19 \ -\{\ -. ds : e -. ds 8 ss -. ds v \h'-1'\o'\(aa\(ga' -. ds _ \h'-1'^ -. ds . \h'-1'. -. ds 3 3 -. ds o a -. ds d- d\h'-1'\(ga -. ds D- D\h'-1'\(hy -. ds th \o'bp' -. ds Th \o'LP' -. ds ae ae -. ds Ae AE -. ds oe oe -. ds Oe OE -.\} -.rm #[ #] #H #V #F C -END - -$indent = 0; - -$begun = ""; - -# Unrolling [^A-Z>]|[A-Z](?!<) gives: // MRE pp 165. -my $nonest = '(?:[^A-Z>]*(?:[A-Z](?!<)[^A-Z>]*)*)'; - -while (<>) { - if ($cutting) { - next unless /^=/; - $cutting = 0; - } - if ($begun) { - if (/^=end\s+$begun/) { - $begun = ""; - } - elsif ($begun =~ /^(roff|man)$/) { - print STDOUT $_; - } - next; - } - chomp; - - # Translate verbatim paragraph - - if (/^\s/) { - @lines = split(/\n/); - for (@lines) { - 1 while s - {^( [^\t]* ) \t ( \t* ) } - { $1 . ' ' x (8 - (length($1)%8) + 8 * (length($2))) }ex; - s/\\/\\e/g; - s/\A/\\&/s; - } - $lines = @lines; - makespace() unless $verbatim++; - print ".Vb $lines\n"; - print join("\n", @lines), "\n"; - print ".Ve\n"; - $needspace = 0; - next; - } - - $verbatim = 0; - - if (/^=for\s+(\S+)\s*/s) { - if ($1 eq "man" or $1 eq "roff") { - print STDOUT $',"\n\n"; - } else { - # ignore unknown for - } - next; - } - elsif (/^=begin\s+(\S+)\s*/s) { - $begun = $1; - if ($1 eq "man" or $1 eq "roff") { - print STDOUT $'."\n\n"; - } - next; - } - - # check for things that'll hosed our noremap scheme; affects $_ - init_noremap(); - - if (!/^=item/) { - - # trofficate backslashes; must do it before what happens below - s/\\/noremap('\\e')/ge; - - # protect leading periods and quotes against *roff - # mistaking them for directives - s/^(?:[A-Z]<)?[.']/\\&$&/gm; - - # first hide the escapes in case we need to - # intuit something and get it wrong due to fmting - - 1 while s/([A-Z]<$nonest>)/noremap($1)/ge; - - # func() is a reference to a perl function - s{ - \b - ( - [:\w]+ \(\) - ) - } {I<$1>}gx; - - # func(n) is a reference to a perl function or a man page - s{ - ([:\w]+) - ( - \( [^\051]+ \) - ) - } {I<$1>\\|$2}gx; - - # convert simple variable references - s/(\s+)([\$\@%][\w:]+)(?!\()/${1}C<$2>/g; - - if (m{ ( - [\-\w]+ - \( - [^\051]*? - [\@\$,] - [^\051]*? - \) - ) - }x && $` !~ /([LCI]<[^<>]*|-)$/ && !/^=\w/) - { - warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [LCI]<$1>\n"; - $oops++; - } - - while (/(-[a-zA-Z])\b/g && $` !~ /[\w\-]$/) { - warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [CB]<$1>\n"; - $oops++; - } - - # put it back so we get the <> processed again; - clear_noremap(0); # 0 means leave the E's - - } else { - # trofficate backslashes - s/\\/noremap('\\e')/ge; - - } - - # need to hide E<> first; they're processed in clear_noremap - s/(E<[^<>]+>)/noremap($1)/ge; - - - $maxnest = 10; - while ($maxnest-- && /[A-Z]</) { - - # can't do C font here - s/([BI])<($nonest)>/font($1) . $2 . font('R')/eg; - - # files and filelike refs in italics - s/F<($nonest)>/I<$1>/g; - - # no break -- usually we want C<> for this - s/S<($nonest)>/nobreak($1)/eg; - - # LREF: a la HREF L<show this text|man/section> - s:L<([^|>]+)\|[^>]+>:$1:g; - - # LREF: a manpage(3f) - s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the I<$1>$2 manpage:g; - - # LREF: an =item on another manpage - s{ - L< - ([^/]+) - / - ( - [:\w]+ - (\(\))? - ) - > - } {the C<$2> entry in the I<$1> manpage}gx; - - # LREF: an =item on this manpage - s{ - ((?: - L< - / - ( - [:\w]+ - (\(\))? - ) - > - (,?\s+(and\s+)?)? - )+) - } { internal_lrefs($1) }gex; - - # LREF: a =head2 (head1?), maybe on a manpage, maybe right here - # the "func" can disambiguate - s{ - L< - (?: - ([a-zA-Z]\S+?) / - )? - "?(.*?)"? - > - }{ - do { - $1 # if no $1, assume it means on this page. - ? "the section on I<$2> in the I<$1> manpage" - : "the section on I<$2>" - } - }gesx; # s in case it goes over multiple lines, so . matches \n - - s/Z<>/\\&/g; - - # comes last because not subject to reprocessing - s/C<($nonest)>/noremap("${CFont_embed}${1}\\fR")/eg; - } - - if (s/^=//) { - $needspace = 0; # Assume this. - - s/\n/ /g; - - ($Cmd, $_) = split(' ', $_, 2); - - $dotlevel = 1; - if ($Cmd eq 'head1') { - $dotlevel = 1; - } - elsif ($Cmd eq 'head2') { - $dotlevel = 1; - } - elsif ($Cmd eq 'item') { - $dotlevel = 2; - } - - if (defined $_) { - &escapes($dotlevel); - s/"/""/g; - } - - clear_noremap(1); - - if ($Cmd eq 'cut') { - $cutting = 1; - } - elsif ($Cmd eq 'head1') { - s/\s+$//; - delete $wanna_see{$_} if exists $wanna_see{$_}; - print qq{.SH "$_"\n}; - push(@Indices, qq{.IX Header "$_"\n}); - } - elsif ($Cmd eq 'head2') { - print qq{.Sh "$_"\n}; - push(@Indices, qq{.IX Subsection "$_"\n}); - } - elsif ($Cmd eq 'over') { - push(@indent,$indent); - $indent += ($_ + 0) || 5; - } - elsif ($Cmd eq 'back') { - $indent = pop(@indent); - warn "$0: Unmatched =back in paragraph $. of $ARGV\n" unless defined $indent; - $needspace = 1; - } - elsif ($Cmd eq 'item') { - s/^\*( |$)/\\(bu$1/g; - # if you know how to get ":s please do - s/\\\*\(L"([^"]+?)\\\*\(R"/'$1'/g; - s/\\\*\(L"([^"]+?)""/'$1'/g; - s/[^"]""([^"]+?)""[^"]/'$1'/g; - # here do something about the $" in perlvar? - print STDOUT qq{.Ip "$_" $indent\n}; - push(@Indices, qq{.IX Item "$_"\n}); - } - elsif ($Cmd eq 'pod') { - # this is just a comment - } - else { - warn "$0: Unrecognized pod directive in paragraph $. of $ARGV: $Cmd\n"; - } - } - else { - if ($needspace) { - &makespace; - } - &escapes(0); - clear_noremap(1); - print $_, "\n"; - $needspace = 1; - } -} +Since environment variables are normally in all uppercase, no additional +special formatting is generally needed; they're glaring enough as it is. -print <<"END"; +=item FILES -.rn }` '' -END +All files used by the program or function, normally presented as a list, and +what it uses them for. File names should be enclosed in FE<lt>E<gt>. It's +particularly important to document files that will be potentially modified. -if (%wanna_see && !$lax) { - @missing = keys %wanna_see; - warn "$0: $Filename is missing required section" - . (@missing > 1 && "s") - . ": @missing\n"; - $oops++; -} +=item CAVEATS -foreach (@Indices) { print "$_\n"; } +Things to take special care with, sometimes called WARNINGS. -exit; -#exit ($oops != 0); +=item BUGS -######################################################################### +Things that are broken or just don't work quite right. -sub nobreak { - my $string = shift; - $string =~ s/ /\\ /g; - $string; -} +=item RESTRICTIONS -sub escapes { - my $indot = shift; - - s/X<(.*?)>/mkindex($1)/ge; - - # translate the minus in foo-bar into foo\-bar for roff - s/([^0-9a-z-])-([^-])/$1\\-$2/g; - - # make -- into the string version \*(-- (defined above) - s/\b--\b/\\*(--/g; - s/"--([^"])/"\\*(--$1/g; # should be a better way - s/([^"])--"/$1\\*(--"/g; - - # fix up quotes; this is somewhat tricky - my $dotmacroL = 'L'; - my $dotmacroR = 'R'; - if ( $indot == 1 ) { - $dotmacroL = 'M'; - $dotmacroR = 'S'; - } - elsif ( $indot >= 2 ) { - $dotmacroL = 'N'; - $dotmacroR = 'T'; - } - if (!/""/) { - s/(^|\s)(['"])/noremap("$1\\*($dotmacroL$2")/ge; - s/(['"])($|[\-\s,;\\!?.])/noremap("\\*($dotmacroR$1$2")/ge; - } - - #s/(?!")(?:.)--(?!")(?:.)/\\*(--/g; - #s/(?:(?!")(?:.)--(?:"))|(?:(?:")--(?!")(?:.))/\\*(--/g; - - - # make sure that func() keeps a bit a space tween the parens - ### s/\b\(\)/\\|()/g; - ### s/\b\(\)/(\\|)/g; - - # make C++ into \*C+, which is a squinched version (defined above) - s/\bC\+\+/\\*(C+/g; - - # make double underbars have a little tiny space between them - s/__/_\\|_/g; - - # PI goes to \*(PI (defined above) - s/\bPI\b/noremap('\\*(PI')/ge; - - # make all caps a teeny bit smaller, but don't muck with embedded code literals - my $hidCFont = font('C'); - if ($Cmd !~ /^head1/) { # SH already makes smaller - # /g isn't enough; 1 while or we'll be off - -# 1 while s{ -# (?!$hidCFont)(..|^.|^) -# \b -# ( -# [A-Z][\/A-Z+:\-\d_$.]+ -# ) -# (s?) -# \b -# } {$1\\s-1$2\\s0}gmox; - - 1 while s{ - (?!$hidCFont)(..|^.|^) - ( - \b[A-Z]{2,}[\/A-Z+:\-\d_\$]*\b - ) - } { - $1 . noremap( '\\s-1' . $2 . '\\s0' ) - }egmox; - - } -} +Bugs you don't plan to fix. :-) -# make troff just be normal, but make small nroff get quoted -# decided to just put the quotes in the text; sigh; -sub ccvt { - local($_,$prev) = @_; - noremap(qq{.CQ "$_" \n\\&}); -} +=item NOTES -sub makespace { - if ($indent) { - print ".Sp\n"; - } - else { - print ".PP\n"; - } -} +Miscellaneous commentary. -sub mkindex { - my ($entry) = @_; - my @entries = split m:\s*/\s*:, $entry; - push @Indices, ".IX Xref " . join ' ', map {qq("$_")} @entries; - return ''; -} +=item SEE ALSO -sub font { - local($font) = shift; - return '\\f' . noremap($font); -} +Other man pages to check out, like man(1), man(7), makewhatis(8), or +catman(8). Normally a simple list of man pages separated by commas, or a +paragraph giving the name of a reference work. Man page references, if they +use the standard C<name(section)> form, don't have to be enclosed in +LE<lt>E<gt>, but other things in this section probably should be when +appropriate. You may need to use the C<LE<lt>...|...E<gt>> syntax to keep +B<pod2man> and B<pod2text> from being too verbose; see perlpod(1). -sub noremap { - local($thing_to_hide) = shift; - $thing_to_hide =~ tr/\000-\177/\200-\377/; - return $thing_to_hide; -} +If the package has a web site, include a URL here. -sub init_noremap { - # escape high bit characters in input stream - s/([\200-\377])/"E<".ord($1).">"/ge; -} +=item AUTHOR -sub clear_noremap { - my $ready_to_print = $_[0]; - - tr/\200-\377/\000-\177/; - - # trofficate backslashes - # s/(?!\\e)(?:..|^.|^)\\/\\e/g; - - # now for the E<>s, which have been hidden until now - # otherwise the interative \w<> processing would have - # been hosed by the E<gt> - s { - E< - ( - ( \d + ) - | ( [A-Za-z]+ ) - ) - > - } { - do { - defined $2 - ? chr($2) - : - exists $HTML_Escapes{$3} - ? do { $HTML_Escapes{$3} } - : do { - warn "$0: Unknown escape in paragraph $. of $ARGV: ``$&''\n"; - "E<$1>"; - } - } - }egx if $ready_to_print; -} +Who wrote it (use AUTHORS for multiple people). Including your current +e-mail address (or some e-mail address to which bug reports should be sent) +so that users have a way of contacting you is a good idea. Remember that +program documentation tends to roam the wild for far longer than you expect +and pick an e-mail address that's likely to last if possible. -sub internal_lrefs { - local($_) = shift; - local $trailing_and = s/and\s+$// ? "and " : ""; - - s{L</([^>]+)>}{$1}g; - my(@items) = split( /(?:,?\s+(?:and\s+)?)/ ); - my $retstr = "the "; - my $i; - for ($i = 0; $i <= $#items; $i++) { - $retstr .= "C<$items[$i]>"; - $retstr .= ", " if @items > 2 && $i != $#items; - $retstr .= " and " if $i+2 == @items; - } - - $retstr .= " entr" . ( @items > 1 ? "ies" : "y" ) - . " elsewhere in this document"; - # terminal space to avoid words running together (pattern used - # strips terminal spaces) - $retstr .= " " if length $trailing_and; - $retstr .= $trailing_and; - - return $retstr; +=item HISTORY -} +Programs derived from other sources sometimes have this, or you might keep a +modification log here. -BEGIN { -%HTML_Escapes = ( - 'amp' => '&', # ampersand - 'lt' => '<', # left chevron, less-than - 'gt' => '>', # right chevron, greater-than - 'quot' => '"', # double quote - - "Aacute" => "A\\*'", # capital A, acute accent - "aacute" => "a\\*'", # small a, acute accent - "Acirc" => "A\\*^", # capital A, circumflex accent - "acirc" => "a\\*^", # small a, circumflex accent - "AElig" => '\*(AE', # capital AE diphthong (ligature) - "aelig" => '\*(ae', # small ae diphthong (ligature) - "Agrave" => "A\\*`", # capital A, grave accent - "agrave" => "A\\*`", # small a, grave accent - "Aring" => 'A\\*o', # capital A, ring - "aring" => 'a\\*o', # small a, ring - "Atilde" => 'A\\*~', # capital A, tilde - "atilde" => 'a\\*~', # small a, tilde - "Auml" => 'A\\*:', # capital A, dieresis or umlaut mark - "auml" => 'a\\*:', # small a, dieresis or umlaut mark - "Ccedil" => 'C\\*,', # capital C, cedilla - "ccedil" => 'c\\*,', # small c, cedilla - "Eacute" => "E\\*'", # capital E, acute accent - "eacute" => "e\\*'", # small e, acute accent - "Ecirc" => "E\\*^", # capital E, circumflex accent - "ecirc" => "e\\*^", # small e, circumflex accent - "Egrave" => "E\\*`", # capital E, grave accent - "egrave" => "e\\*`", # small e, grave accent - "ETH" => '\\*(D-', # capital Eth, Icelandic - "eth" => '\\*(d-', # small eth, Icelandic - "Euml" => "E\\*:", # capital E, dieresis or umlaut mark - "euml" => "e\\*:", # small e, dieresis or umlaut mark - "Iacute" => "I\\*'", # capital I, acute accent - "iacute" => "i\\*'", # small i, acute accent - "Icirc" => "I\\*^", # capital I, circumflex accent - "icirc" => "i\\*^", # small i, circumflex accent - "Igrave" => "I\\*`", # capital I, grave accent - "igrave" => "i\\*`", # small i, grave accent - "Iuml" => "I\\*:", # capital I, dieresis or umlaut mark - "iuml" => "i\\*:", # small i, dieresis or umlaut mark - "Ntilde" => 'N\*~', # capital N, tilde - "ntilde" => 'n\*~', # small n, tilde - "Oacute" => "O\\*'", # capital O, acute accent - "oacute" => "o\\*'", # small o, acute accent - "Ocirc" => "O\\*^", # capital O, circumflex accent - "ocirc" => "o\\*^", # small o, circumflex accent - "Ograve" => "O\\*`", # capital O, grave accent - "ograve" => "o\\*`", # small o, grave accent - "Oslash" => "O\\*/", # capital O, slash - "oslash" => "o\\*/", # small o, slash - "Otilde" => "O\\*~", # capital O, tilde - "otilde" => "o\\*~", # small o, tilde - "Ouml" => "O\\*:", # capital O, dieresis or umlaut mark - "ouml" => "o\\*:", # small o, dieresis or umlaut mark - "szlig" => '\*8', # small sharp s, German (sz ligature) - "THORN" => '\\*(Th', # capital THORN, Icelandic - "thorn" => '\\*(th',, # small thorn, Icelandic - "Uacute" => "U\\*'", # capital U, acute accent - "uacute" => "u\\*'", # small u, acute accent - "Ucirc" => "U\\*^", # capital U, circumflex accent - "ucirc" => "u\\*^", # small u, circumflex accent - "Ugrave" => "U\\*`", # capital U, grave accent - "ugrave" => "u\\*`", # small u, grave accent - "Uuml" => "U\\*:", # capital U, dieresis or umlaut mark - "uuml" => "u\\*:", # small u, dieresis or umlaut mark - "Yacute" => "Y\\*'", # capital Y, acute accent - "yacute" => "y\\*'", # small y, acute accent - "yuml" => "y\\*:", # small y, dieresis or umlaut mark -); -} +=back + +In addition, some systems use CONFORMING TO to note conformance to relevant +standards and MT-LEVEL to note safeness for use in threaded programs or +signal handlers. These headings are primarily useful when documenting parts +of a C library. Documentation of object-oriented libraries or modules may +use CONSTRUCTORS and METHODS sections for detailed documentation of the +parts of the library and save the DESCRIPTION section for an overview; other +large modules may use FUNCTIONS for similar reasons. Some people use +OVERVIEW to summarize the description if it's quite long. Sometimes there's +an additional COPYRIGHT section at the bottom, for licensing terms. +AVAILABILITY is sometimes added, giving the canonical download site for the +software or a URL for updates. + +Section ordering varies, although NAME should I<always> be the first section +(you'll break some man page systems otherwise), and NAME, SYNOPSIS, +DESCRIPTION, and OPTIONS generally always occur first and in that order if +present. In general, SEE ALSO, AUTHOR, and similar material should be left +for last. Some systems also move WARNINGS and NOTES to last. The order +given above should be reasonable for most purposes. + +Finally, as a general note, try not to use an excessive amount of markup. +As documented here and in L<Pod::Man>, you can safely leave Perl variables, +function names, man page references, and the like unadorned by markup and +the POD translators will figure it out for you. This makes it much easier +to later edit the documentation. Note that many existing translators +(including this one currently) will do the wrong thing with e-mail addresses +or URLs when wrapped in LE<lt>E<gt>, so don't do that. + +For additional information that may be more accurate for your specific +system, see either man(5) or man(7) depending on your system manual section +numbering conventions. + +=head1 SEE ALSO + +L<Pod::Man|Pod::Man>, L<Pod::Parser|Pod::Parser>, man(1), nroff(1), +troff(1), man(7) + +The man page documenting the an macro set may be man(5) instead of man(7) on +your system. + +=head1 AUTHOR + +Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the +original B<pod2man> by Larry Wall and Tom Christiansen. Large portions of +this documentation, particularly the sections on the anatomy of a proper man +page, are taken from the B<pod2man> documentation by Tom. +=cut !NO!SUBS! +#'# (cperl-mode) close OUT or die "Can't close $file: $!"; chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; diff --git a/contrib/perl5/pod/pod2text.PL b/contrib/perl5/pod/pod2text.PL index 94516c3..c5460ae 100644 --- a/contrib/perl5/pod/pod2text.PL +++ b/contrib/perl5/pod/pod2text.PL @@ -28,21 +28,200 @@ print "Extracting $file (with variable substitutions)\n"; print OUT <<"!GROK!THIS!"; $Config{startperl} eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' - if \$running_under_some_shell; + if \$running_under_some_shell; !GROK!THIS! # In the following, perl variables are not expanded during extraction. print OUT <<'!NO!SUBS!'; -use Pod::Text; +# pod2text -- Convert POD data to formatted ASCII text. +# +# Copyright 1999, 2000 by Russ Allbery <rra@stanford.edu> +# +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl itself. +# +# The driver script for Pod::Text, Pod::Text::Termcap, and Pod::Text::Color, +# invoked by perldoc -t among other things. -if(@ARGV) { - pod2text($ARGV[0]); -} else { - pod2text("<&STDIN"); +require 5.004; + +use Getopt::Long qw(GetOptions); +use Pod::Text (); +use Pod::Usage qw(pod2usage); + +use strict; + +# Take an initial pass through our options, looking for one of the form +# -<number>. We turn that into -w <number> for compatibility with the +# original pod2text script. +for (my $i = 0; $i < @ARGV; $i++) { + last if $ARGV[$i] =~ /^--$/; + if ($ARGV[$i] =~ /^-(\d+)$/) { + splice (@ARGV, $i++, 1, '-w', $1); + } +} + +# Insert -- into @ARGV before any single dash argument to hide it from +# Getopt::Long; we want to interpret it as meaning stdin (which Pod::Parser +# does correctly). +my $stdin; +@ARGV = map { $_ eq '-' && !$stdin++ ? ('--', $_) : $_ } @ARGV; + +# Parse our options. Use the same names as Pod::Text for simplicity, and +# default to sentence boundaries turned off for compatibility. +my %options; +$options{sentence} = 0; +Getopt::Long::config ('bundling'); +GetOptions (\%options, 'alt|a', 'color|c', 'help|h', 'indent|i=i', + 'loose|l', 'sentence|s', 'termcap|t', 'width|w=i') or exit 1; +pod2usage (1) if $options{help}; + +# Figure out what formatter we're going to use. -c overrides -t. +my $formatter = 'Pod::Text'; +if ($options{color}) { + $formatter = 'Pod::Text::Color'; + eval { require Term::ANSIColor }; + if ($@) { die "-c (--color) requires Term::ANSIColor be installed\n" } + require Pod::Text::Color; +} elsif ($options{termcap}) { + $formatter = 'Pod::Text::Termcap'; + require Pod::Text::Termcap; } +delete @options{'color', 'termcap'}; + +# Initialize and run the formatter. +my $parser = $formatter->new (%options); +$parser->parse_from_file (@ARGV); + +__END__ + +=head1 NAME + +pod2text - Convert POD data to formatted ASCII text + +=head1 SYNOPSIS + +pod2text [B<-aclst>] [B<-i> I<indent>] [B<-w> I<width>] [I<input> [I<output>]] + +pod2text B<-h> + +=head1 DESCRIPTION + +B<pod2text> is a front-end for Pod::Text and its subclasses. It uses them +to generate formatted ASCII text from POD source. It can optionally use +either termcap sequences or ANSI color escape sequences to format the text. + +I<input> is the file to read for POD source (the POD can be embedded in +code). If I<input> isn't given, it defaults to STDIN. I<output>, if given, +is the file to which to write the formatted output. If I<output> isn't +given, the formatted output is written to STDOUT. + +=head1 OPTIONS + +=over 4 + +=item B<-a>, B<--alt> + +Use an alternate output format that, among other things, uses a different +heading style and marks C<=item> entries with a colon in the left margin. + +=item B<-c>, B<--color> + +Format the output with ANSI color escape sequences. Using this option +requires that Term::ANSIColor be installed on your system. + +=item B<-i> I<indent>, B<--indent=>I<indent> + +Set the number of spaces to indent regular text, and the default indentation +for C<=over> blocks. Defaults to 4 spaces if this option isn't given. + +=item B<-h>, B<--help> + +Print out usage information and exit. + +=item B<-l>, B<--loose> + +Print a blank line after a C<=head1> heading. Normally, no blank line is +printed after C<=head1>, although one is still printed after C<=head2>, +because this is the expected formatting for manual pages; if you're +formatting arbitrary text documents, using this option is recommended. + +=item B<-s>, B<--sentence> + +Assume each sentence ends with two spaces and try to preserve that spacing. +Without this option, all consecutive whitespace in non-verbatim paragraphs +is compressed into a single space. + +=item B<-t>, B<--termcap> + +Try to determine the width of the screen and the bold and underline +sequences for the terminal from termcap, and use that information in +formatting the output. Output will be wrapped at two columns less than the +width of your terminal device. Using this option requires that your system +have a termcap file somewhere where Term::Cap can find it and requires that +your system support termios. With this option, the output of B<pod2text> +will contain terminal control sequences for your current terminal type. + +=item B<-w>, B<--width=>I<width>, B<->I<width> + +The column at which to wrap text on the right-hand side. Defaults to 76, +unless B<-t> is given, in which case it's two columns less than the width of +your terminal device. + +=back + +=head1 DIAGNOSTICS + +If B<pod2text> fails with errors, see L<Pod::Text> and L<Pod::Parser> for +information about what those errors might mean. Internally, it can also +produce the following diagnostics: + +=over 4 + +=item -c (--color) requires Term::ANSIColor be installed + +(F) B<-c> or B<--color> were given, but Term::ANSIColor could not be +loaded. + +=item Unknown option: %s + +(F) An unknown command line option was given. + +=back + +In addition, other L<Getopt::Long|Getopt::Long> error messages may result +from invalid command-line options. + +=head1 ENVIRONMENT + +=over 4 + +=item COLUMNS + +If B<-t> is given, B<pod2text> will take the current width of your screen +from this environment variable, if available. It overrides terminal width +information in TERMCAP. + +=item TERMCAP + +If B<-t> is given, B<pod2text> will use the contents of this environment +variable if available to determine the correct formatting sequences for your +current terminal device. + +=back + +=head1 SEE ALSO + +L<Pod::Text|Pod::Text>, L<Pod::Text::Color|Pod::Text::Color>, +L<Pod::Text::Termcap|Pod::Text::Termcap>, L<Pod::Parser|Pod::Parser> + +=head1 AUTHOR + +Russ Allbery E<lt>rra@stanford.eduE<gt>. +=cut !NO!SUBS! close OUT or die "Can't close $file: $!"; diff --git a/contrib/perl5/pod/pod2usage.PL b/contrib/perl5/pod/pod2usage.PL new file mode 100644 index 0000000..e0f70b2 --- /dev/null +++ b/contrib/perl5/pod/pod2usage.PL @@ -0,0 +1,179 @@ +#!/usr/local/bin/perl + +use Config; +use File::Basename qw(&basename &dirname); +use Cwd; + +# List explicitly here the variables you want Configure to +# generate. Metaconfig only looks for shell variables, so you +# have to mention them as if they were shell variables, not +# %Config entries. Thus you write +# $startperl +# to ensure Configure will look for $Config{startperl}. + +# This forces PL files to create target in same directory as PL file. +# This is so that make depend always knows where to find PL derivatives. +$origdir = cwd; +chdir(dirname($0)); +($file = basename($0)) =~ s/\.PL$//; +$file =~ s/\.pl$// if ($^O eq 'os2' or $^O eq 'dos'); # "case-forgiving" +$file =~ s/\.pl$/.com/ if ($^O eq 'VMS'); # "case-forgiving" + +open OUT,">$file" or die "Can't create $file: $!"; + +print "Extracting $file (with variable substitutions)\n"; + +# In this section, perl variables will be expanded during extraction. +# You can use $Config{...} to use Configure variables. + +print OUT <<"!GROK!THIS!"; +$Config{'startperl'} + eval 'exec perl -S \$0 "\$@"' + if 0; +!GROK!THIS! + +# In the following, perl variables are not expanded during extraction. + +print OUT <<'!NO!SUBS!'; + +############################################################################# +# pod2usage -- command to print usage messages from embedded pod docs +# +# Copyright (c) 1996-1999 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +use strict; +use diagnostics; + +=head1 NAME + +pod2usage - print usage messages from embedded pod docs in files + +=head1 SYNOPSIS + +=over 12 + +=item B<pod2usage> + +[B<-help>] +[B<-man>] +[B<-exit>S< >I<exitval>] +[B<-output>S< >I<outfile>] +[B<-verbose> I<level>] +[B<-pathlist> I<dirlist>] +I<file> + +=back + +=head1 OPTIONS AND ARGUMENTS + +=over 8 + +=item B<-help> + +Print a brief help message and exit. + +=item B<-man> + +Print this command's manual page and exit. + +=item B<-exit> I<exitval> + +The exit status value to return. + +=item B<-output> I<outfile> + +The output file to print to. If the special names "-" or ">&1" or ">&STDOUT" +are used then standard output is used. If ">&2" or ">&STDERR" is used then +standard error is used. + +=item B<-verbose> I<level> + +The desired level of verbosity to use: + + 1 : print SYNOPSIS only + 2 : print SYNOPSIS sections and any OPTIONS/ARGUMENTS sections + 3 : print the entire manpage (similar to running pod2text) + +=item B<-pathlist> I<dirlist> + +Specifies one or more directories to search for the input file if it +was not supplied with an absolute path. Each directory path in the given +list should be separated by a ':' on Unix (';' on MSWin32 and DOS). + +=item I<file> + +The pathname of a file containing pod documentation to be output in +usage mesage format (defaults to standard input). + +=back + +=head1 DESCRIPTION + +B<pod2usage> will read the given input file looking for pod +documentation and will print the corresponding usage message. +If no input file is specifed than standard input is read. + +B<pod2usage> invokes the B<pod2usage()> function in the B<Pod::Usage> +module. Please see L<Pod::Usage/pod2usage()>. + +=head1 SEE ALSO + +L<Pod::Usage>, L<pod2text(1)> + +=head1 AUTHOR + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +Based on code for B<pod2text(1)> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=cut + +use Pod::Usage; +use Getopt::Long; + +## Define options +my %options = (); +my @opt_specs = ( + "help", + "man", + "exit=i", + "output=s", + "pathlist=s", + "verbose=i", +); + +## Parse options +GetOptions(\%options, @opt_specs) || pod2usage(2); +pod2usage(1) if ($options{help}); +pod2usage(VERBOSE => 2) if ($options{man}); + +## Dont default to STDIN if connected to a terminal +pod2usage(2) if ((@ARGV == 0) && (-t STDIN)); + +@ARGV = ("-") unless (@ARGV > 0); +if (@ARGV > 1) { + print STDERR "pod2usage: Too many filenames given\n\n"; + pod2usage(2); +} + +my %usage = (); +$usage{-input} = shift(@ARGV); +$usage{-exitval} = $options{"exit"} if (defined $options{"exit"}); +$usage{-output} = $options{"output"} if (defined $options{"output"}); +$usage{-verbose} = $options{"verbose"} if (defined $options{"verbose"}); +$usage{-pathlist} = $options{"pathlist"} if (defined $options{"pathlist"}); + +pod2usage(\%usage); + + +!NO!SUBS! + +close OUT or die "Can't close $file: $!"; +chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; +exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; +chdir $origdir; diff --git a/contrib/perl5/pod/podchecker.PL b/contrib/perl5/pod/podchecker.PL new file mode 100644 index 0000000..a7f9643 --- /dev/null +++ b/contrib/perl5/pod/podchecker.PL @@ -0,0 +1,162 @@ +#!/usr/local/bin/perl + +use Config; +use File::Basename qw(&basename &dirname); +use Cwd; + +# List explicitly here the variables you want Configure to +# generate. Metaconfig only looks for shell variables, so you +# have to mention them as if they were shell variables, not +# %Config entries. Thus you write +# $startperl +# to ensure Configure will look for $Config{startperl}. + +# This forces PL files to create target in same directory as PL file. +# This is so that make depend always knows where to find PL derivatives. +$origdir = cwd; +chdir(dirname($0)); +($file = basename($0)) =~ s/\.PL$//; +$file =~ s/\.pl$// + if ($^O eq 'VMS' or $^O eq 'os2' or $^O eq 'dos'); # "case-forgiving" +$file .= '.com' if $^O eq 'VMS'; + +open OUT,">$file" or die "Can't create $file: $!"; + +print "Extracting $file (with variable substitutions)\n"; + +# In this section, perl variables will be expanded during extraction. +# You can use $Config{...} to use Configure variables. + +print OUT <<"!GROK!THIS!"; +$Config{'startperl'} + eval 'exec perl -S \$0 "\$@"' + if 0; +!GROK!THIS! + +# In the following, perl variables are not expanded during extraction. + +print OUT <<'!NO!SUBS!'; +############################################################################# +# podchecker -- command to invoke the podchecker function in Pod::Checker +# +# Copyright (c) 1998-1999 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +use strict; +#use diagnostics; + +=head1 NAME + +podchecker - check the syntax of POD format documentation files + +=head1 SYNOPSIS + +B<podchecker> [B<-help>] [B<-man>] [B<-(no)warnings>] [I<file>S< >...] + +=head1 OPTIONS AND ARGUMENTS + +=over 8 + +=item B<-help> + +Print a brief help message and exit. + +=item B<-man> + +Print the manual page and exit. + +=item B<-warnings> B<-nowarnings> + +Turn on/off printing of warnings. + +=item I<file> + +The pathname of a POD file to syntax-check (defaults to standard input). + +=back + +=head1 DESCRIPTION + +B<podchecker> will read the given input files looking for POD +syntax errors in the POD documentation and will print any errors +it find to STDERR. At the end, it will print a status message +indicating the number of errors found. + +B<podchecker> invokes the B<podchecker()> function exported by B<Pod::Checker> +Please see L<Pod::Checker/podchecker()> for more details. + +=head1 RETURN VALUE + +B<podchecker> returns a 0 (zero) exit status if all specified +POD files are ok. + +=head1 ERRORS + +B<podchecker> returns the exit status 1 if at least one of +the given POD files has syntax errors. + +The status 2 indicates that at least one of the specified +files does not contain I<any> POD commands. + +Status 1 overrides status 2. If you want unambigouus +results, call B<podchecker> with one single argument only. + +=head1 SEE ALSO + +L<Pod::Parser> and L<Pod::Checker> + +=head1 AUTHORS + +Brad Appleton E<lt>bradapp@enteract.comE<gt>, +Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt> + +Based on code for B<Pod::Text::pod2text(1)> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=cut + + +use Pod::Checker; +use Pod::Usage; +use Getopt::Long; + +## Define options +my %options = ( + "help" => 0, + "man" => 0, + "warnings" => 1, +); + +## Parse options +GetOptions(\%options, "help", "man", "warnings!") || pod2usage(2); +pod2usage(1) if ($options{help}); +pod2usage(-verbose => 2) if ($options{man}); + +## Dont default to STDIN if connected to a terminal +pod2usage(2) if ((@ARGV == 0) && (-t STDIN)); + +## Invoke podchecker() +my $status = 0; +@ARGV = ("<&STDIN") unless(@ARGV); +for (@ARGV) { + my $s = podchecker($_, undef, '-warnings' => $options{warnings}); + if($s > 0) { + # errors occurred + $status = 1; + } + elsif($s < 0) { + # no pod found + $status = 2 unless($status); + } +} +exit $status; + +!NO!SUBS! + +close OUT or die "Can't close $file: $!"; +chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; +exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; +chdir $origdir; diff --git a/contrib/perl5/pod/podselect.PL b/contrib/perl5/pod/podselect.PL new file mode 100644 index 0000000..f2ba80a --- /dev/null +++ b/contrib/perl5/pod/podselect.PL @@ -0,0 +1,142 @@ +#!/usr/local/bin/perl + +use Config; +use File::Basename qw(&basename &dirname); +use Cwd; + +# List explicitly here the variables you want Configure to +# generate. Metaconfig only looks for shell variables, so you +# have to mention them as if they were shell variables, not +# %Config entries. Thus you write +# $startperl +# to ensure Configure will look for $Config{startperl}. + +# This forces PL files to create target in same directory as PL file. +# This is so that make depend always knows where to find PL derivatives. +$origdir = cwd; +chdir(dirname($0)); +($file = basename($0)) =~ s/\.PL$//; +$file =~ s/\.pl$// if ($^O eq 'os2' or $^O eq 'dos'); # "case-forgiving" +$file =~ s/\.pl$/.com/ if ($^O eq 'VMS'); # "case-forgiving" + +open OUT,">$file" or die "Can't create $file: $!"; + +print "Extracting $file (with variable substitutions)\n"; + +# In this section, perl variables will be expanded during extraction. +# You can use $Config{...} to use Configure variables. + +print OUT <<"!GROK!THIS!"; +$Config{'startperl'} + eval 'exec perl -S \$0 "\$@"' + if 0; +!GROK!THIS! + +# In the following, perl variables are not expanded during extraction. + +print OUT <<'!NO!SUBS!'; + +############################################################################# +# podselect -- command to invoke the podselect function in Pod::Select +# +# Copyright (c) 1996-1999 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +use strict; +use diagnostics; + +=head1 NAME + +podselect - print selected sections of pod documentation on standard output + +=head1 SYNOPSIS + +B<podselect> [B<-help>] [B<-man>] [B<-section>S< >I<section-spec>] +[I<file>S< >...] + +=head1 OPTIONS AND ARGUMENTS + +=over 8 + +=item B<-help> + +Print a brief help message and exit. + +=item B<-man> + +Print the manual page and exit. + +=item B<-section>S< >I<section-spec> + +Specify a section to include in the output. +See L<Pod::Parser/"SECTION SPECIFICATIONS"> +for the format to use for I<section-spec>. +This option may be given multiple times on the command line. + +=item I<file> + +The pathname of a file from which to select sections of pod +documentation (defaults to standard input). + +=back + +=head1 DESCRIPTION + +B<podselect> will read the given input files looking for pod +documentation and will print out (in raw pod format) all sections that +match one ore more of the given section specifications. If no section +specifications are given than all pod sections encountered are output. + +B<podselect> invokes the B<podselect()> function exported by B<Pod::Select> +Please see L<Pod::Select/podselect()> for more details. + +=head1 SEE ALSO + +L<Pod::Parser> and L<Pod::Select> + +=head1 AUTHOR + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +Based on code for B<Pod::Text::pod2text(1)> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=cut + +use Pod::Select; +use Pod::Usage; +use Getopt::Long; + +## Define options +my %options = ( + "help" => 0, + "man" => 0, + "sections" => [], +); + +## Parse options +GetOptions(\%options, "help", "man", "sections|select=s@") || pod2usage(2); +pod2usage(1) if ($options{help}); +pod2usage(-verbose => 2) if ($options{man}); + +## Dont default to STDIN if connected to a terminal +pod2usage(2) if ((@ARGV == 0) && (-t STDIN)); + +## Invoke podselect(). +if (@{ $options{"sections"} } > 0) { + podselect({ -sections => $options{"sections"} }, @ARGV); +} +else { + podselect(@ARGV); +} + + +!NO!SUBS! + +close OUT or die "Can't close $file: $!"; +chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; +exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; +chdir $origdir; diff --git a/contrib/perl5/pod/roffitall b/contrib/perl5/pod/roffitall index 9ab7f29..018c0b3 100644 --- a/contrib/perl5/pod/roffitall +++ b/contrib/perl5/pod/roffitall @@ -36,24 +36,29 @@ toroff=` $mandir/perlre.1 \ $mandir/perlrun.1 \ $mandir/perlfunc.1 \ - $mandir/perlopentut.1 \ $mandir/perlvar.1 \ $mandir/perlsub.1 \ + $mandir/perlopentut.1 \ $mandir/perlmod.1 \ $mandir/perlmodlib.1 \ $mandir/perlmodinstall.1 \ + $mandir/perlfork.1 \ $mandir/perlform.1 \ $mandir/perllocale.1 \ $mandir/perlref.1 \ $mandir/perlreftut.1 \ $mandir/perldsc.1 \ $mandir/perllol.1 \ + $mandir/perlboot.1 \ $mandir/perltoot.1 \ $mandir/perlobj.1 \ $mandir/perltie.1 \ $mandir/perlbot.1 \ $mandir/perlipc.1 \ + $mandir/perlthrtut.1 \ + $mandir/perldebguts.1 \ $mandir/perldebug.1 \ + $mandir/perlnumber.1 \ $mandir/perldiag.1 \ $mandir/perlsec.1 \ $mandir/perltrap.1 \ @@ -67,10 +72,15 @@ toroff=` $mandir/perlxstut.1 \ $mandir/perlguts.1 \ $mandir/perlcall.1 \ - $mandir/perlthrtut.1 \ + $mandir/perlcompile.1 \ + $mandir/perltodo.1 \ + $mandir/perlapi.1 \ + $mandir/perlintern.1 \ + $mandir/perlhack.1 \ $mandir/perlhist.1 \ $mandir/perldelta.1 \ $mandir/perl5004delta.1 \ + $mandir/perl5005delta.1 \ $mandir/perlfaq.1 \ $mandir/perlfaq1.1 \ $mandir/perlfaq2.1 \ @@ -84,6 +94,7 @@ toroff=` \ $mandir/a2p.1 \ $mandir/c2ph.1 \ + $mandir/dprofpp.1 \ $mandir/h2ph.1 \ $mandir/h2xs.1 \ $mandir/perlbug.1 \ @@ -102,6 +113,7 @@ toroff=` $libdir/constant.3 \ $libdir/diagnostics.3 \ $libdir/fields.3 \ + $libdir/filetest.3 \ $libdir/integer.3 \ $libdir/less.3 \ $libdir/lib.3 \ |