diff options
author | jkh <jkh@FreeBSD.org> | 1993-06-18 04:22:21 +0000 |
---|---|---|
committer | jkh <jkh@FreeBSD.org> | 1993-06-18 04:22:21 +0000 |
commit | 777d9e83b8466b1892a1383f2540a711b9107887 (patch) | |
tree | d52de9deedffb757f9283a79e16e1c1c47aba8f9 | |
parent | 25062ba061871945759b3baa833fe64969383e40 (diff) | |
download | FreeBSD-src-777d9e83b8466b1892a1383f2540a711b9107887.zip FreeBSD-src-777d9e83b8466b1892a1383f2540a711b9107887.tar.gz |
Updated GNU utilities
64 files changed, 33137 insertions, 0 deletions
diff --git a/gnu/COPYING b/gnu/COPYING new file mode 100644 index 0000000..a43ea21 --- /dev/null +++ b/gnu/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) 19yy <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/gnu/Makefile b/gnu/Makefile new file mode 100644 index 0000000..7da2c32 --- /dev/null +++ b/gnu/Makefile @@ -0,0 +1,5 @@ +# @(#)Makefile 5.33.1.1 (Berkeley) 5/6/91 + +SUBDIR= gawk groff tar + +.include <bsd.subdir.mk> diff --git a/gnu/usr.bin/awk/ACKNOWLEDGMENT b/gnu/usr.bin/awk/ACKNOWLEDGMENT new file mode 100644 index 0000000..b6c3b0b --- /dev/null +++ b/gnu/usr.bin/awk/ACKNOWLEDGMENT @@ -0,0 +1,21 @@ +The current developers of Gawk would like to thank and acknowledge the +many people who have contributed to the development through bug reports +and fixes and suggestions. Unfortunately, we have not been organized +enough to keep track of all the names -- for that we apologize. + +Another group of people have assisted even more by porting Gawk to new +platforms and providing a great deal of feedback. They are: + + Hal Peterson <hrp@pecan.cray.com> (Cray) + Pat Rankin <gawk.rankin@EQL.Caltech.Edu> (VMS) + Michal Jaegermann <NTOMCZAK@vm.ucs.UAlberta.CA> (Atari, NeXT, DEC 3100) + Mike Lijewski <mjlx@eagle.cnsf.cornell.edu> (IBM RS6000) + Scott Deifik <scottd@amgen.com> (MSDOS 2.14) + Kent Williams (MSDOS 2.11) + Conrad Kwok (MSDOS earlier versions) + Scott Garfinkle (MSDOS earlier versions) + +Last, but far from least, we would like to thank Brian Kernighan who +has helped to clear up many dark corners of the language and provided a +restraining touch when we have been overly tempted by "feeping +creaturism". diff --git a/gnu/usr.bin/awk/COPYING b/gnu/usr.bin/awk/COPYING new file mode 100644 index 0000000..3358a7b --- /dev/null +++ b/gnu/usr.bin/awk/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) 19yy <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. + diff --git a/gnu/usr.bin/awk/FUTURES b/gnu/usr.bin/awk/FUTURES new file mode 100644 index 0000000..b096560 --- /dev/null +++ b/gnu/usr.bin/awk/FUTURES @@ -0,0 +1,120 @@ +This file lists future projects and enhancements for gawk. Items are listed +in roughly the order they will be done for a given release. This file is +mainly for use by the developers to help keep themselves on track, please +don't bug us too much about schedules or what all this really means. + +For 2.16 +======== +David: + Move to autoconf-based configure system. + + Allow RS to be a regexp. + + RT variable to hold text of record terminator + + RECLEN variable for fixed length records + + Feedback alloca.s changes to FSF + + Extensible hashing in memory of awk arrays + + Split() with null string as third arg to split up strings + + Analogously, setting FS="" would split the input record into individual + characters. + +Arnold: + Generalize IGNORECASE + any value makes it work, not just numeric non-zero + make it apply to *all* string comparisons + + Fix FILENAME to have an initial value of "", not "-" + + Clean up code by isolating system-specific functions in separate files. + + Undertake significant directory reorganization. + + Extensive manual cleanup: + Use of texinfo 2.0 features + Lots more examples + Document all of the above. + +In 2.17 +======= +David: + + Incorporate newer dfa.c and regex.c (go to POSIX regexps) + + Make regex + dfa less dependant on gawk header file includes + + General sub functions: + edit(line, pat, sub) and gedit(line, pat, sub) + that return the substituted strings and allow \1 etc. in the sub string. + +Arnold: + DBM storage of awk arrays. Try to allow multiple dbm packages + + ? Have strftime() pay attention to the value of ENVIRON["TZ"] + + Additional manual features: + Document posix regexps + Document use of dbm arrays + ? Add an error messages section to the manual + ? A section on where gawk is bounded + regex + i/o + sun fp conversions + +For 2.18 +======== + +Arnold: + Add chdir and stat built-in functions. + + Add function pointers as valid variable types. + + Add an `ftw' built-in function that takes a function pointer. + +David: + + Do an optimization pass over parse tree? + +For 2.19 or later: +================== +Add variables similar to C's __FILE__ and __LINE__ for better diagnostics +from within awk programs. + +Add an explicit concatenation operator and assignment version. + +? Add a switch statement + +Add the ability to seek on an open file and retrieve the current file position. + +Add lint checking everywhere, including check for use of builtin vars. +only in new awk. + +"restart" keyword + +Add |& + +Make awk '/foo/' files... run at egrep speeds + +Do a reference card + +Allow OFMT to be other than a floating point format. + +Allow redefining of builtin functions? + +Make it faster and smaller. + +For 3.x: +======== + +Create a gawk compiler? + +Create a gawk-to-C translator? (or C++??) + +Provide awk profiling and debugging. + + + diff --git a/gnu/usr.bin/awk/LIMITATIONS b/gnu/usr.bin/awk/LIMITATIONS new file mode 100644 index 0000000..5877197 --- /dev/null +++ b/gnu/usr.bin/awk/LIMITATIONS @@ -0,0 +1,14 @@ +This file describes limits of gawk on a Unix system (although it +is variable even then). Non-Unix systems may have other limits. + +# of fields in a record: MAX_INT +Length of input record: MAX_INT +Length of output record: unlimited +Size of a field: MAX_INT +Size of a printf string: MAX_INT +Size of a literal string: MAX_INT +Characters in a character class: 2^(# of bits per byte) +# of file redirections: unlimited +# of pipe redirections: min(# of processes per user, # of open files) +double-precision floating point +Length of source line: unlimited diff --git a/gnu/usr.bin/awk/Makefile b/gnu/usr.bin/awk/Makefile new file mode 100644 index 0000000..fdca82c --- /dev/null +++ b/gnu/usr.bin/awk/Makefile @@ -0,0 +1,13 @@ +PROG= awk +SRCS= main.c eval.c builtin.c msg.c iop.c io.c field.c array.c \ + node.c version.c re.c awk.c regex.c dfa.c \ + getopt.c getopt1.c +CFLAGS+= -DGAWK +LDADD= -lm +DPADD= ${LIBM} +CLEANFILES+= awk.c y.tab.h + +MAN1= awk.0 + +.include <bsd.prog.mk> +.include "../../usr.bin/Makefile.inc" diff --git a/gnu/usr.bin/awk/NEWS b/gnu/usr.bin/awk/NEWS new file mode 100644 index 0000000..6711373 --- /dev/null +++ b/gnu/usr.bin/awk/NEWS @@ -0,0 +1,1295 @@ +Changes from 2.15.1 to 2.15.2 +--------------------------- + +Additions to the FUTURES file. + +Document undefined order of output when using both standard output + and /dev/stdout or any of the /dev output files that gawk emulates in + the absence of OS support. + +Clean up the distribution generation in Makefile.in: the info files are + now included, the distributed files are marked read-only and patched + distributions are now unpacked in a directory named with the patch level. + + +Changes from 2.15 to 2.15.1 +--------------------------- + +Close stdout and stderr before all redirections on program exit. This allows + detection of write errors and also fixes the messages test on Solaris 2.x. + +Removed YYMAXDEPTH define in awk.y which was limiting the parser stack depth. + +Changes to config/bsd44, Makefile.bsd44 and configure to bring it into line + with the BSD4.4 release. + +Changed Makefile to use prefix, exec_prefix, bindir etc. + +make install now installs info files. + +make install now sets permissions on installed files. + +Make targets added: uninstall, distclean, mostlyclean and realclean. + +Added config.h to cleaner and clobber make targets. + +Changes to config/{hpux8x,sysv3,sysv4,ultrix41} to deal with alloca(). + +Change to getopt.h for portability. + +Added more special cases to the getpgrp() call. + +Added README.ibmrt-aos and config/ibmrt-aos. + +Changes from 2.14 to 2.15 +--------------------------- + +Command-line source can now be mixed with library functions. + +ARGIND variable tracks index in ARGV of FILENAME. + +GNU style long options in addition to short options. + +Plan 9 style special files interpreted by gawk: + /dev/pid + /dev/ppid + /dev/pgrpid + /dev/user + $1 = getuid + $2 = geteuid + $3 = getgid + $4 = getegid + $5 ... $NF = getgroups if supported + +ERRNO variable contains error string if getline or close fails. + +Very old options -a and -e have gone away. + +Inftest has been removed from the default target in test/Makefile -- the + results were too machine specific and resulted in too many false alarms. + +A README.amiga has been added. + +The "too many arguments supplied for format string" warning message is only + in effect under the lint option. + +Code improvements in dfa.c. + +Fixed all reported bugs: + + Writes are checked for failure (such as full filesystem). + + Stopped (at least some) runaway error messages. + + gsub(/^/, "x") does the right thing for $0 of 0, 1, or more length. + + close() on a command being piped to a getline now works properly. + + The input record will no longer be freed upon an explicit close() + of the input file. + + A NUL character in FS now works. + + In a substitute, \\& now means a literal backslash followed by what + was matched. + + Integer overflow of substring length in substr() is caught. + + An input record without a newline termination is handled properly. + + In io.c, check is against only EMFILE so that system file table + is not filled. + + Renamed all files with names longer than 14 characters. + + Escaped characters in regular expressions were being lost when + IGNORECASE was used. + + Long source lines were not being handled properly. + + Sourcefiles that ended in a tab but no newline were bombing. + + Patterns that could match zero characters in split() were not working + properly. + + The parsedebug option was not working. + + The grammar was being a bit too lenient, allowing some very dubious + programs to pass. + + Compilation with DEBUG defined now works. + + A variable read in with getline was not being treated as a potential + number. + + Array subscripts were not always of string type. + + +Changes from 2.13.2 to 2.14 +--------------------------- + +Updated manual! + +Added "next file" to skip efficiently to the next input file. + +Fixed potential of overflowing buffer in do_sprintf(). + +Plugged small memory leak in sub_common(). + +EOF on a redirect is now "sticky" -- it can only be cleared by close()ing + the pipe or file. + +Now works if used via a #! /bin/gawk line at the top of an executable file + when that line ends with whitespace. + +Added some checks to the grammar to catch redefinition of builtin functions. + This could eventually be the basis for an extension to allow redefining + functions, but in the mean time it's a good error catching facility. + +Negative integer exponents now work. + +Modified do_system() to make sure it had a non-null string to be passed + to system(3). Thus, system("") will flush any pending output but not go + through the overhead of forking an un-needed shell. + +A fix to floating point comparisons so that NaNs compare right on IEEE systems. + +Added code to make sure we're not opening directories for reading and such. + +Added code to do better diagnoses of weird or null file names. + +Allow continue outside of a loop, unless in strict posix mode. Lint option + will issue warning. + +New missing/strftime.c. There has been one chage that affects gawk. Posix + now defines a %V conversion so the vms conversion has been changed to %v. + If this version is used with gawk -Wlint and they use %V in a call to + strftime, they'll get a warning. + +Error messages now conform to GNU standard (I hope). + +Changed comparisons to conform to the description found in the file POSIX. + This is inconsistent with the current POSIX draft, but that is broken. + Hopefully the final POSIX standard will conform to this version. + (Alas, this will have to wait for 1003.2b, which will be a revision to + the 1003.2 standard. That standard has been frozen with the broken + comparison rules.) + +The length of a string was a short and now is a size_t. + +Updated VMS help. + +Added quite a few new tests to the test suite and deleted many due to lack of + written releases. Test output is only removed if it is identical to the + "good" output. + +Fixed a couple of bugs for reference to $0 when $0 is "" -- particularly in + a BEGIN block. + +Fixed premature freeing in construct "$0 = $0". + +Removed the call to wait_any() in gawk_popen(), since on at least some systems, + if gawk's input was from a pipe, the predecssor process in the pipe was a + child of gawk and this caused a deadlock. + +Regexp can (once again) match a newline, if given explicitly. + +nextopen() makes sure file name is null terminated. + +Fixed VMS pipe simulation. Improved VMS I/O performance. + +Catch . used in variable names. + +Fixed bug in getline without redirect from a file -- it was quitting after the + first EOF, rather than trying the next file. + +Fixed bug in treatment of backslash at the end of a string -- it was bombing + rather than doing something sensible. It is not clear what this should mean, + but for now I issue a warning and take it as a literal backslash. + +Moved setting of regexp syntax to before the option parsing in main(), to + handle things like -v FS='[.,;]' + +Fixed bug when NF is set by user -- fields_arr must be expanded if necessary + and "new" fields must be initialized. + +Fixed several bugs in [g]sub() for no match found or the match is 0-length. + +Fixed bug where in gsub() a pattern anchorred at the beginning would still + substitute throughout the string. + +make test does not assume the . is in PATH. + +Fixed bug when a field beyond the end of the record was requested after + $0 was altered (directly or indirectly). + +Fixed bug for assignment to field beyond end of record -- the assigned value + was not found on subsequent reference to that field. + +Fixed bug for FS a regexp and it matches at the end of a record. + +Fixed memory leak for an array local to a function. + +Fixed hanging of pipe redirection to getline + +Fixed coredump on access to $0 inside BEGIN block. + +Fixed treatment of RS = "". It now parses the fields correctly and strips + leading whitspace from a record if FS is a space. + +Fixed faking of /dev/stdin. + +Fixed problem with x += x + +Use of scalar as array and vice versa is now detected. + +IGNORECASE now obeyed for FS (even if FS is a single alphabetic character). + +Switch to GPL version 2. + +Renamed awk.tab.c to awktab.c for MSDOS and VMS tar programs. + +Renamed this file (CHANGES) to NEWS. + +Use fmod() instead of modf() and provide FMOD_MISSING #define to undo + this change. + +Correct the volatile declarations in eval.c. + +Avoid errant closing of the file descriptors for stdin, stdout and stderr. + +Be more flexible about where semi-colons can occur in programs. + +Check for write errors on all output, not just on close(). + +Eliminate the need for missing/{strtol.c,vprintf.c}. + +Use GNU getopt and eliminate missing/getopt.c. + +More "lint" checking. + + +Changes from 2.13.1 to 2.13.2 +----------------------------- + +Toward conformity with GNU standards, configure is a link to mkconf, the latter + to disappear in the next major release. + +Update to config/bsd43. + +Added config/apollo, config/msc60, config/cray2-50, config/interactive2.2 + +sgi33.cc added for compilation using cc ratther than gcc. + +Ultrix41 now propagates to config.h properly -- as part of a general + mechanism in configure for kludges -- #define anything from a config file + just gets tacked onto the end of config.h -- to be used sparingly. + +Got rid of an unnecessary and troublesome declaration of vprintf(). + +Small improvement in locality of error messages. + +Try to diagnose use of array as scalar and vice versa -- to be improved in + the future. + +Fix for last bug fix for Cray division code--sigh. + +More changes to test suite to explicitly use sh. Also get rid of + a few generated files. + +Fixed off-by-one bug in string concatenation code. + +Fix for use of array that is passed in from a previous function parameter. + Addition to test suite for above. + +A number of changes associated with changing NF and access to fields + beyond the end of the current record. + +Change to missing/memcmp.c to avoid seg. fault on zero length input. + +Updates to test suite (including some inadvertently left out of the last patch) + to invoke sh explicitly (rather than rely on #!/bin/sh) and remove some + junk files. test/chem/good updated to correspond to bug fixes. + +Changes from 2.13.0 to 2.13.1 +----------------------------- + +More configs and PORTS. + +Fixed bug wherein a simple division produced an erroneous FPE, caused by + the Cray division workaround -- that code is now #ifdef'd only for + Cray *and* fixed. + +Fixed bug in modulus implementation -- it was very close to the above + code, so I noticed it. + +Fixed portability problem with limits.h in missing.c + +Fixed portability problem with tzname and daylight -- define TZNAME_MISSING + if strftime() is missing and tzname is also. + +Better support for Latin-1 character set. + +Fixed portability problem in test Makefile. + +Updated PROBLEMS file. + +=============================== gawk-2.13 released ========================= +Changes from 2.12.42 to 2.12.43 +------------------------------- + +Typo in awk.y + +Fixed up strftime.3 and added doc. for %V. + +Changes from 2.12.41 to 2.12.42 +------------------------------- + +Fixed bug in devopen() -- if you had write permission in /dev, + it would just create /dev/stdout etc.!! + +Final (?) VMS update. + +Make NeXT use GFMT_WORKAROUND + +Fixed bug in sub_common() for substitute on zero-length match. Improved the + code a bit while I was at it. + +Fixed grammar so that $i++ parses as ($i)++ + +Put support/* back in the distribution (didn't I already do this?!) + +Changes from 2.12.40 to 2.12.41 +------------------------------- + +VMS workaround for broken %g format. + +Changes from 2.12.39 to 2.12.40 +------------------------------- + +Minor man page update. + +Fixed latent bug in redirect(). + +Changes from 2.12.38 to 2.12.39 +------------------------------- + +Updates to test suite -- remove dependence on changing gawk.1 man page. + +Changes from 2.12.37 to 2.12.38 +------------------------------- + +Fixed bug in use of *= without whitespace following. + +VMS update. + +Updates to man page. + +Option handling updates in main.c + +test/manyfiles redone and added to bigtest. + +Fixed latent (on Sun) bug in handling of save_fs. + +Changes from 2.12.36 to 2.12.37 +------------------------------- + +Update REL in Makefile-dist. Incorporate test suite into main distribution. + +Minor fix in regtest. + +Changes from 2.12.35 to 2.12.36 +------------------------------- + +Release takes on dual personality -- 2.12.36 and 2.13.0 -- any further + patches before public release won't count for 2.13, although they will for + 2.12 -- be careful to avoid confusion! patchlevel.h will be the last thing + to change. + +Cray updates to deal with arithmetic problems. + +Minor test suite updates. + +Fixed latent bug in parser (freeing memory). + +Changes from 2.12.34 to 2.12.35 +------------------------------- + +VMS updates. + +Flush stdout at top of err() and stderr at bottom. + +Fixed bug in eval_condition() -- it wasn't testing for MAYBE_NUM and + doing the force_number(). + +Included the missing manyfiles.awk and a new test to catch the above bug which + I am amazed wasn't already caught by the test suite -- it's pretty basic. + +Changes from 2.12.33 to 2.12.34 +------------------------------- + +Atari updates -- including bug fix. + +More VMS updates -- also nuke vms/version.com. + +Fixed bug in handling of large numbers of redirections -- it was probably never + tested before (blush!). + +Minor rearrangement of code in r_force_number(). + +Made chem and regtest tests a bit more portable (Ultrix again). + +Added another test -- manyfiles -- not invoked under any other test -- very Unix + specific. + +Rough beginning of LIMITATIONS file -- need my AWK book to complete it. + +Changes from 2.12.32 to 2.12.33 +------------------------------- + +Expunge debug.? from various files. + +Remove vestiges of Floor and Ceil kludge. + +Special case integer division -- mainly for Cray, but maybe someone else + will benefit. + +Workaround for iop_close closing an output pipe descriptor on Cray -- + not conditional since I think it may fix a bug on SGI as well and I don't + think it can hurt elsewhere. + +Fixed memory leak in assoc_lookup(). + +Small cleanup in test suite. + +Changes from 2.12.31 to 2.12.32 +------------------------------- + +Nuked debug.c and debugging flag -- there are better ways. + +Nuked version.sh and version.c in subdirectories. + +Fixed bug in handling of IGNORECASE. + +Fixed bug when FIELDWIDTHS was set via -v option. + +Fixed (obscure) bug when $0 is assigned a numerical value. + +Fixed so that escape sequences in command-line assignments work (as it already + said in the comment). + +Added a few cases to test suite. + +Moved support/* back into distribution. + +VMS updates. + +Changes from 2.12.30 to 2.12.31 +------------------------------- + +Cosmetic manual page changes. + +Updated sunos3 config. + +Small changes in test suite including renaming files over 14 chars. in length. + +Changes from 2.12.29 to 2.12.30 +------------------------------- + +Bug fix for many string concatenations in a row. + +Changes from 2.12.28 to 2.12.29 +------------------------------- + +Minor cleanup in awk.y + +Minor VMS update. + +Minor atari update. + +Changes from 2.12.27 to 2.12.28 +------------------------------- + +Got rid of the debugging goop in eval.c -- there are better ways. + +Sequent port. + +VMS changes left out of the last patch -- sigh! config/vms.h renamed + to config/vms-conf.h. + +Fixed missing/tzset.c + +Removed use of gcvt() and GCVT_MISSING -- turns out it was no faster than + sprintf("%g") and caused all sorts of portability headaches. + +Tuned get_field() -- it was unnecessarily parsing the whole record on reference + to $0. + +Tuned interpret() a bit in the rule_node loop. + +In r_force_number(), worked around bug in Uglix strtod() and got rid of + ugly do{}while(0) at Michal's urging. + +Replaced do_deref() and deref with unref(node) -- much cleaner and a bit faster. + +Got rid of assign_number() -- contrary to comment, it was no faster than + just making a new node and freeing the old one. + +Replaced make_number() and tmp_number() with macros that call mk_number(). + +Changed freenode() and newnode() into macros -- the latter is getnode() + which calls more_nodes() as necessary. + +Changes from 2.12.26 to 2.12.27 +------------------------------- + +Completion of Cray 2 port (includes a kludge for floor() and ceil() + that may go or be changed -- I think that it may just be working around + a bug in chem that is being tweaked on the Cray). + +More VMS updates. + +Moved kludge over yacc's insertion of malloc and realloc declarations + from protos.h to the Makefile. + +Added a lisp interpreter in awk to the test suite. (Invoked under + bigtest.) + +Cleanup in r_force_number() -- I had never gotten around to a thorough + profile of the cache code and it turns out to be not worth it. + +Performance boost -- do lazy force_number()'ing for fields etc. i.e. + flag them (MAYBE_NUM) and call force_number only as necessary. + +Changes from 2.12.25 to 2.12.26 +------------------------------- + +Rework of regexp stuff so that dynamic regexps have reasonable + performance -- string used for compiled regexp is stored and + compared to new string -- if same, no recompilation is necessary. + Also, very dynamic regexps cause dfa-based searching to be turned + off. + +Code in dev_open() is back to returning fileno(std*) rather than + dup()ing it. This will be documented. Sorry for the run-around + on this. + +Minor atari updates. + +Minor vms update. + +Missing file from MSDOS port. + +Added warning (under lint) if third arg. of [g]sub is a constant and + handle it properly in the code (i.e. return how many matches). + +Changes from 2.12.24 to 2.12.25 +------------------------------- + +MSDOS port. + +Non-consequential changes to regexp variables in preparation for + a more serious change to fix a serious performance problem. + +Changes from 2.12.23 to 2.12.24 +------------------------------- + +Fixed bug in output flushing introduced a few patches back. This caused + serious performance losses. + +Changes from 2.12.22 to 2.12.23 +------------------------------- + +Accidently left config/cray2-60 out of last patch. + +Added some missing dependencies to Makefile. + +Cleaned up mkconf a bit; made yacc the default parser (no alloca needed, + right?); added rs6000 hook for signed characters. + +Made regex.c with NO_ALLOCA undefined work. + +Fixed bug in dfa.c for systems where free(NULL) bombs. + +Deleted a few cant_happen()'s that *really* can't hapen. + +Changes from 2.12.21 to 2.12.22 +------------------------------- + +Added to config stuff the ability to choose YACC rather than bison. + +Fixed CHAR_UNSIGNED in config.h-dist. + +Second arg. of strtod() is char ** rather than const char **. + +stackb is now initially malloc()'ed since it may be realloc()'ed. + +VMS updates. + +Added SIZE_T_MISSING to config stuff and a default typedef to awk.h. + (Maybe it is not needed on any current systems??) + +re_compile_pattern()'s size is now size_t unconditionally. + +Changes from 2.12.20 to 2.12.21 +------------------------------- + +Corrected missing/gcvt.c. + +Got rid of use of dup2() and thus DUP_MISSING. + +Updated config/sgi33. + +Turned on (and fixed) in cmp_nodes() the behaviour that I *hope* will be in + POSIX 1003.2 for relational comparisons. + +Small updates to test suite. + +Changes from 2.12.19 to 2.12.20 +------------------------------- + +Sloppy, sloppy, sloppy!! I didn't even try to compile the last two + patches. This one fixes goofs in regex.c. + +Changes from 2.12.18 to 2.12.19 +------------------------------- + +Cleanup of last patch. + +Changes from 2.12.17 to 2.12.18 +------------------------------- + +Makefile renamed to Makefile-dist. + +Added alloca() configuration to mkconf. (A bit kludgey.) Just + add a single line containing ALLOCA_PW, ALLOCA_S or ALLOCA_C + to the appropriate config file to have Makefile-dist edited + accordingly. + +Reorganized output flushing to correspond with new semantics of + devopen() on "/dev/std*" etc. + +Fixed rest of last goof!! + +Save and restore errno in do_pathopen(). + +Miscellaneous atari updates. + +Get rid of the trailing comma in the NODETYPE definition (Cray + compiler won't take it). + +Try to make the use of `const' consistent since Cray compiler is + fussy about that. See the changes to `basename' and `myname'. + +It turns out that, according to section 3.8.3 (Macro Replacement) + of the ANSI Standard: ``If there are sequences of preprocessing + tokens within the list of arguments that would otherwise act as + preprocessing directives, the behavior is undefined.'' That means + that you cannot count on the behavior of the declaration of + re_compile_pattern in awk.h, and indeed the Cray compiler chokes on it. + +Replaced alloca with malloc/realloc/free in regex.c. It was much simpler + than expected. (Inside NO_ALLOCA for now -- by default no alloca.) + +Added a configuration file, config/cray60, for Unicos-6.0. + +Changes from 2.12.16 to 2.12.17 +------------------------------- + +Ooops. Goofed signal use in last patch. + +Changes from 2.12.15 to 2.12.16 +------------------------------- + +RENAMED *_dir to just * (e.g. missing_dir). + +Numerous VMS changes. + +Proper inclusion of atari and vms files. + +Added experimental (ifdef'd out) RELAXED_CONTINUATION and DEFAULT_FILETYPE + -- please comment on these! + +Moved pathopen() to io.c (sigh). + +Put local directory ahead in default AWKPATH. + +Added facility in mkconf to echo comments on stdout: lines beginning + with "#echo " will have the remainder of the line echoed when mkconf is run. + Any lines starting with "#" will otherwise be treated as comments. The + intent is to be able to say: + "#echo Make sure you uncomment alloca.c in the Makefile" + or the like. + +Prototype fix for V.4 + +Fixed version_string to not print leading @(#). + +Fixed FIELDWIDTHS to work with strict (turned out to be easy). + +Fixed conf for V.2. + +Changed semantics of /dev/fd/n to be like on real /dev/fd. + +Several configuration and updates in the makefile. + +Updated manpage. + +Include tzset.c and system.c from missing_dir that were accidently left out of + the last patch. + +Fixed bug in cmdline variable assignment -- arg was getting freed(!) in + call to variable. + +Backed out of parse-time constant folding for now, until I can figure out + how to do it right. + +Fixed devopen() so that getline <"-" works. + +Changes from 2.12.14 to 2.12.15 +------------------------------- + +Changed config/* to a condensed form that can be used with mkconf to generate + a config.h from config.h-dist -- much easier to maintain. Please chaeck + carefully against what you had before for a particular system and report + any problems. vms.h remains separate since the stuff at the bottom + didn't quite fit the mkconf model -- hopefully cleared up later. + +Fixed bug in grammar -- didn't allow function definition to be separated from + other rules by a semi-colon. + +VMS fix to #includes in missing.c -- should we just be including awk.h? + +Updated README for texinfo.tex version. + +Updating of copyright in all .[chy] files. + +Added but commented out Michal's fix to strftime. + +Added tzset() emulation based on Rick Adams' code. Added TZSET_MISSING to + config.h-dist. + +Added strftime.3 man page for missing_dir + +More posix: func, **, **= don't work in -W posix + +More lint: ^, ^= not in old awk + +gawk.1: removed ref to -DNO_DEV_FD, other minor updating. + +Style change: pushbak becomes pushback() in yylex(). + +Changes from 2.12.13 to 2.12.14 +------------------------------- + +Better (?) organization of awk.h -- attempt to keep all system dependencies + near the top and move some of the non-general things out of the config.h + files. + +Change to handling of SYSTEM_MISSING. + +Small change to ultrix config. + +Do "/dev/fd/*" etc. checking at runtime. + +First pass at VMS port. + +Improvements to error handling (when lexeme spans buffers). + +Fixed backslash handling -- why didn't I notice this sooner? + +Added programs from book to test suite and new target "bigtest" to Makefile. + +Changes from 2.12.12 to 2.12.13 +------------------------------- + +Recognize OFS and ORS specially so that OFS = 9 works without efficiency hit. + Took advantage of opportunity to tune do_print*() for about 10% win on a + print with 5 args (i.e. small but significant). + +Somewhat pervasive changes to reconcile CONVFMT vs. OFMT. + +Better initialization of builtin vars. + +Make config/* consistent wrt STRTOL_MISSING. + +Small portability improvement to alloca.s + +Improvements to lint code in awk.y + +Replaced strtol() with a better one by Chris Torek. + +Changes from 2.12.11 to 2.12.12 +------------------------------- + +Added PORTS file to record successful ports. + +Added #define const to nothing if not STDC and added const to strtod() header. + +Added * to printf capabilities and partially implemented ' ' and '+' (has an + effect for %d only, silently ignored for other formats). I'm afraid that's + as far as I want to go before I look at a complete replacement for + do_sprintf(). + +Added warning for /regexp/ on LHS of MATCHOP. + +Changes from 2.12.10 to 2.12.11 +------------------------------- + +Small Makefile improvements. + +Some remaining nits from the NeXT port. + +Got rid of bcopy() define in awk.h -- not needed anymore (??) + +Changed private in builtin.c -- it is special on Sequent. + +Added subset implementation of strtol() and STRTOL_MISSING. + +A little bit of cleanup in debug.c, dfa.c. + +Changes from 2.12.9 to 2.12.10 +------------------------------ + +Redid compatability checking and checking for # of args. + +Removed all references to variables[] from outside awk.y, in preparation + for a more abstract interface to the symbol table. + +Got rid of a remaining use of bcopy() in regex.c. + +Changes from 2.12.8 to 2.12.9 +----------------------------- + +Portability improvements for atari, next and decstation. + +Bug fix in substr() -- wasn't handling 3rd arg. of -1 properly. + +Manpage updates. + +Moved support from src release to doc release. + +Updated FUTURES file. + +Added some "lint" warnings. + +Changes from 2.12.7 to 2.12.8 +----------------------------- + +Changed time() to systime(). + +Changed warning() in snode() to fatal(). + +strftime() now defaults second arg. to current time. + +Changes from 2.12.6 to 2.12.7 +----------------------------- + +Fixed bug in sub_common() involving inadequate allocation of a buffer. + +Added some missing files to the Makefile. + +Changes from 2.12.5 to 2.12.6 +----------------------------- + +Fixed bug wherein non-redirected getline could call iop_close() just + prior to a call from do_input(). + +Fixed bug in handling of /dev/stdout and /dev/stderr. + +Changes from 2.12.4 to 2.12.5 +----------------------------- + +Updated README and support directory. + +Changes from 2.12.3 to 2.12.4 +----------------------------- + +Updated CHANGES and TODO (should have been done in previous 2 patches). + +Changes from 2.12.2 to 2.12.3 +----------------------------- + +Brought regex.c and alloca.s into line with current FSF versions. + +Changes from 2.12.1 to 2.12.2 +----------------------------- + +Portability improvements; mostly moving system prototypes out of awk.h + +Introduction of strftime. + +Use of CONVFMT. + +Changes from 2.12 to 2.12.1 +----------------------------- + +Consolidated treatment of command-line assignments (thus correcting the +-v treatment). + +Rationalized builtin-variable handling into a table-driven process, thus +simplifying variable() and eliminating spc_var(). + +Fixed bug in handling of command-line source that ended in a newline. + +Simplified install() and lookup(). + +Did away with double-mallocing of identifiers and now free second and later +instances of a name, after the first gets installed into the symbol table. + +Treat IGNORECASE specially, simplifying a lot of code, and allowing +checking against strict conformance only on setting it, rather than on each +pattern match. + +Fixed regexp matching when IGNORECASE is non-zero (broken when dfa.c was +added). + +Fixed bug where $0 was not being marked as valid, even after it was rebuilt. +This caused mangling of $0. + + +Changes from 2.11.1 to 2.12 +----------------------------- + +Makefile: + +Portability improvements in Makefile. +Move configuration stuff into config.h + +FSF files: + +Synchronized alloca.[cs] and regex.[ch] with FSF. + +array.c: + +Rationalized hash routines into one with a different algorithm. +delete() now works if the array is a local variable. +Changed interface of assoc_next() and avoided dereferencing past the end of the + array. + +awk.h: + +Merged non-prototype and prototype declarations in awk.h. +Expanded tree_eval #define to short-circuit more calls of r_tree_eval(). + +awk.y: + +Delinted some of the code in the grammar. +Fixed and improved some of the error message printing. +Changed to accomodate unlimited length source lines. +Line continuation now works as advertised. +Source lines can be arbitrarily long. +Refined grammar hacks so that /= assignment works. Regular expressions + starting with /= are recognized at the beginning of a line, after && or || + and after ~ or !~. More contexts can be added if necessary. +Fixed IGNORECASE (multiple scans for backslash). +Condensed expression_lists in array references. +Detect and warn for correct # args in builtin functions -- call most of them + with a fixed number (i.e. fill in defaults at parse-time rather than at + run-time). +Load ENVIRON only if it is referenced (detected at parse-time). +Treat NF, FS, RS, NR, FNR specially at parse time, to improve run time. +Fold constant expressions at parse time. +Do make_regexp() on third arg. of split() at parse tiem if it is a constant. + +builtin.c: + +srand() returns 0 the first time called. +Replaced alloca() with malloc() in do_sprintf(). +Fixed setting of RSTART and RLENGTH in do_match(). +Got rid of get_{one,two,three} and allowance for variable # of args. at + run-time -- this is now done at parse-time. +Fixed latent bug in [g]sub whereby changes to $0 would never get made. +Rewrote much of sub_common() for simplicity and performance. +Added ctime() and time() builtin functions (unless -DSTRICT). ctime() returns + a time string like the C function, given the number of seconds since the epoch + and time() returns the current time in seconds. +do_sprintf() now checks for mismatch between format string and number of + arguments supplied. + +dfa.c + +This is borrowed (almost unmodified) from GNU grep to provide faster searches. + +eval.c + +Node_var, Node_var_array and Node_param_list handled from macro rather + than in r_tree_eval(). +Changed cmp_nodes() to not do a force_number() -- this, combined with a + force_number() on ARGV[] and ENVIRON[] brings it into line with other awks +Greatly simplified cmp_nodes(). +Separated out Node_NF, Node_FS, Node_RS, Node_NR and Node_FNR in get_lhs(). +All adjacent string concatenations now done at once. + +field.c + +Added support for FIELDWIDTHS. +Fixed bug in get_field() whereby changes to a field were not always + properly reflected in $0. +Reordered tests in parse_field() so that reference off the end of the buffer + doesn't happen. +set_FS() now sets *parse_field i.e. routine to call depending on type of FS. +It also does make_regexp() for FS if needed. get_field() passes FS_regexp + to re_parse_field(), as does do_split(). +Changes to set_field() and set_record() to avoid malloc'ing and free'ing the + field nodes repeatedly. The fields now just point into $0 unless they are + assigned to another variable or changed. force_number() on the field is + *only* done when the field is needed. + +gawk.1 + +Fixed troff formatting problem on .TP lines. + +io.c + +Moved some code out into iop.c. +Output from pipes and system() calls is properly synchronized. +Status from pipe close properly returned. +Bug in getline with no redirect fixed. + +iop.c + +This file contains a totally revamped get_a_record and associated code. + +main.c + +Command line programs no longer use a temporary file. +Therefore, tmpnam() no longer required. +Deprecated -a and -e options -- they will go away in the next release, + but for now they cause a warning. +Moved -C, -V, -c options to -W ala posix. +Added -W posix option: throw out \x +Added -W lint option. + + +node.c + +force_number() now allows pure numerics to have leading whitespace. +Added make_string facility to optimize case of adding an already malloc'd + string. +Cleaned up and simplified do_deref(). +Fixed bug in handling of stref==255 in do_deref(). + +re.c + +contains the interface to regexp code + +Changes from 2.11.1 to FSF version of same +------------------------------------------ +Thu Jan 4 14:19:30 1990 Jim Kingdon (kingdon at albert) + + * Makefile (YACC): Add -y to bison part. + + * missing.c: Add #include <stdio.h>. + +Sun Dec 24 16:16:05 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu) + + * * Makefile: Add (commented out) default defines for Sony News. + + * awk.h: Move declaration of vprintf so it will compile when + -DVPRINTF_MISSING is defined. + +Mon Nov 13 18:54:08 1989 Robert J. Chassell (bob at apple-gunkies.ai.mit.edu) + + * gawk.texinfo: changed @-commands that are not part of the + standard, currently released texinfmt.el to those that are. + Otherwise, only people with the as-yet unreleased makeinfo.c can + format this file. + +Changes from 2.11beta to 2.11.1 (production) +-------------------------------------------- + +Went from "beta" to production status!!! + +Now flushes stdout before closing pipes or redirected files to +synchonize output. + +MS-DOS changes added in. + +Signal handler return type parameterized in Makefile and awk.h and +some lint removed. debug.c cleaned up. + +Fixed FS splitting to never match null strings, per book. + +Correction to the manual's description of FS. + +Some compilers break on char *foo = "string" + 4 so fixed version.sh and +main.c. + +Changes from 2.10beta to 2.11beta +--------------------------------- + +This release fixes all reported bugs that we could reproduce. Probably +some of the changes are not documented here. + +The next release will probably not be a beta release! + +The most important change is the addition of the -nostalgia option. :-) + +The documentation has been improved and brought up-to-date. + +There has been a lot of general cleaning up of the code that is not otherwise +documented here. There has been a movement toward using standard-conforming +library routines and providing them (in missing.d) for systems lacking them. +Improved (hopefully) configuration through Makfile modifications and missing.c. +In particular, straightened out confusion over vprintf #defines, declarations +etc. + +Deleted RCS log comments from source, to reduce source size by about one third. +Most of them were horribly out-of-date, anyway. + +Renamed source files to reflect (for the most part) their contents. + +More and improved error messages. Cleanup and fixes to yyerror(). +String constants are not altered in input buffer, so error messages come out +better. Fixed usage message. Make use of ANSI C strerror() function +(provided). + +Plugged many more memory leaks. The memory consumption is now quite +reasonable over a wide range of programs. + +Uses volatile declaration if STDC > 0 to avoid problems due to longjmp. + +New -a and -e options to use awk or egrep style regexps, respectively, +since POSIX says awk should use egrep regexps. Default is -a. + +Added -v option for setting variables before the first file is encountered. +Version information now uses -V and copyleft uses -C. + +Added a patchlevel.h file and its use for -V and -C. + +Append_right() optimized for major improvement to programs with a *lot* +of statements. + +Operator precedence has been corrected to match draft Posix. + +Tightened up grammar for builtin functions so that only length +may be called without arguments or parentheses. + +/regex/ is now a normal expression that can appear in any expression +context. + +Allow /= to begin a regexp. Allow ..[../..].. in a regexp. + +Allow empty compound statements ({}). + +Made return and next illegal outside a function and in BEGIN/END respectively. + +Division by zero is now illegal and causes a fatal error. + +Fixed exponentiation so that x ^ 0 and x ^= 0 both return 1. + +Fixed do_sqrt, do_log, and do_exp to do argument/return checking and +print an error message, per the manual. + +Fixed main to catch SIGSEGV to get source and data file line numbers. + +Fixed yyerror to print the ^ at the beginning of the bad token, not the end. + +Fix to substr() builtin: it was failing if the arguments +weren't already strings. + +Added new node value flag NUMERIC to indicate that a variable is +purely a number as opposed to type NUM which indicates that +the node's numeric value is valid. This is set in make_number(), +tmp_number and r_force_number() when appropriate and used in +cmp_nodes(). This fixed a bug in comparison of variables that had +numeric prefixes. The new code uses strtod() and eliminates is_a_number(). +A simple strtod() is provided for systems lacking one. It does no +overflow checking, so could be improved. + +Simplification and efficiency improvement in force_string. + +Added performance tweak in r_force_number(). + +Fixed a bug with nested loops and break/continue in functions. + +Fixed inconsistency in handling of empty fields when $0 has to be rebuilt. +Happens to simplify rebuild_record(). + +Cleaned up the code associated with opening a pipe for reading. Gawk +now has its own popen routine (gawk_popen) that allocates an IOBUF +and keeps track of the pid of the child process. gawk_pclose +marks the appropriate child as defunct in the right struct redirect. + +Cleaned up and fixed close_redir(). + +Fixed an obscure bug to do with redirection. Intermingled ">" and ">>" +redirects did not output in a predictable order. + +Improved handling of output bufferring: now all print[f]s redirected to a tty +or pipe are flushed immediately and non-redirected output to a tty is flushed +before the next input record is read. + +Fixed a bug in get_a_record() where bcopy() could have copied over +a random pointer. + +Fixed a bug when RS="" and records separated by multiple blank lines. + +Got rid of SLOWIO code which was out-of-date anyway. + +Fix in get_field() for case where $0 is changed and then $(n) are +changed and then $0 is used. + +Fixed infinite loop on failure to open file for reading from getline. +Now handles redirect file open failures properly. + +Filenames such as /dev/stdin now allowed on the command line as well as +in redirects. + +Fixed so that gawk '$1' where $1 is a zero tests false. + +Fixed parsing so that `RLENGTH -1' parses the same as `RLENGTH - 1', +for example. + +The return from a user-defined function now defaults to the Null node. +This fixes a core-dump-causing bug when the return value of a function +is used and that function returns no value. + +Now catches floating point exceptions to avoid core dumps. + +Bug fix for deleting elements of an array -- under some conditions, it was +deleting more than one element at a time. + +Fix in AWKPATH code for running off the end of the string. + +Fixed handling of precision in *printf calls. %0.2d now works properly, +as does %c. [s]printf now recognizes %i and %X. + +Fixed a bug in printing of very large (>240) strings. + +Cleaned up erroneous behaviour for RS == "". + +Added IGNORECASE support to index(). + +Simplified and fixed newnode/freenode. + +Fixed reference to $(anything) in a BEGIN block. + +Eliminated use of USG rand48(). + +Bug fix in force_string for machines with 16-bit ints. + +Replaced use of mktemp() with tmpnam() and provided a partial implementation of +the latter for systems that don't have it. + +Added a portability check for includes in io.c. + +Minor portability fix in alloc.c plus addition of xmalloc(). + +Portability fix: on UMAX4.2, st_blksize is zero for a pipe, thus breaking +iop_alloc() -- fixed. + +Workaround for compiler bug on Sun386i in do_sprintf. + +More and improved prototypes in awk.h. + +Consolidated C escape parsing code into one place. + +strict flag is now turned on only when invoked with compatability option. +It now applies to fewer things. + +Changed cast of f._ptr in vprintf.c from (unsigned char *) to (char *). +Hopefully this is right for the systems that use this code (I don't). + +Support for pipes under MSDOS added. diff --git a/gnu/usr.bin/awk/PORTS b/gnu/usr.bin/awk/PORTS new file mode 100644 index 0000000..95e133f --- /dev/null +++ b/gnu/usr.bin/awk/PORTS @@ -0,0 +1,32 @@ +A recent version of gawk has been successfully compiled and run "make test" +on the following: + +Sun 4/490 running 4.1 +NeXT running 2.0 +DECstation 3100 running Ultrix 4.0 or Ultrix 3.1 (different config) +AtariST (16-bit ints, gcc compiler, byacc, running under TOS) +ESIX V.3.2 Rev D (== System V Release 3.2), the 386. compiler was gcc + bison +IBM RS/6000 (see README.rs6000) +486 running SVR4, using cc and bison +SGI running IRIX 3.3 using gcc (fails with cc) +Sequent Balance running Dynix V3.1 +Cray Y-MP8 running Unicos 6.0.11 +Cray 2 running Unicos 6.1 (modulo trailing zeroes in chem) +VAX/VMS V5.x (should also work on 4.6 and 4.7) +VMS POSIX V1.0, V1.1 +OpenVMS AXP V1.0 +MSDOS - Microsoft C 5.1, compiles and runs very simple testing +BSD 4.4alpha + +From: ghazi@caip.rutgers.edu (Kaveh R. Ghazi): + +arch configured as: +---- -------------- +Hpux 9.0 hpux8x +NeXTStep 2.0 next20 +Sgi Irix 4.0.5 (/bin/cc) sgi405.cc (new file) +Stardent Titan 1500 OSv2.5 sysv3 +Stardent Vistra (i860) SVR4 sysv4 +SunOS 4.1.2 sunos41 +Tektronix XD88 (UTekV 3.2e) sysv3 +Ultrix 4.2 ultrix41 diff --git a/gnu/usr.bin/awk/POSIX b/gnu/usr.bin/awk/POSIX new file mode 100644 index 0000000..f240542 --- /dev/null +++ b/gnu/usr.bin/awk/POSIX @@ -0,0 +1,95 @@ +Right now, the numeric vs. string comparisons are screwed up in draft +11.2. What prompted me to check it out was the note in gnu.bug.utils +which observed that gawk was doing the comparison $1 == "000" +numerically. I think that we can agree that intuitively, this should +be done as a string comparison. Version 2.13.2 of gawk follows the +current POSIX draft. Following is how I (now) think this +stuff should be done. + +1. A numeric literal or the result of a numeric operation has the NUMERIC + attribute. + +2. A string literal or the result of a string operation has the STRING + attribute. + +3. Fields, getline input, FILENAME, ARGV elements, ENVIRON elements and the + elements of an array created by split() that are numeric strings + have the STRNUM attribute. Otherwise, they have the STRING attribute. + Uninitialized variables also have the STRNUM attribute. + +4. Attributes propagate across assignments, but are not changed by + any use. (Although a use may cause the entity to acquire an additional + value such that it has both a numeric and string value -- this leaves the + attribute unchanged.) + +When two operands are compared, either string comparison or numeric comparison +may be used, depending on the attributes of the operands, according to the +following (symmetric) matrix: + + +---------------------------------------------- + | STRING NUMERIC STRNUM +--------+---------------------------------------------- + | +STRING | string string string + | +NUMERIC | string numeric numeric + | +STRNUM | string numeric numeric +--------+---------------------------------------------- + +So, the following program should print all OKs. + +echo '0e2 0a 0 0b +0e2 0a 0 0b' | +$AWK ' +NR == 1 { + num = 0 + str = "0e2" + + print ++test ": " ( (str == "0e2") ? "OK" : "OOPS" ) + print ++test ": " ( ("0e2" != 0) ? "OK" : "OOPS" ) + print ++test ": " ( ("0" != $2) ? "OK" : "OOPS" ) + print ++test ": " ( ("0e2" == $1) ? "OK" : "OOPS" ) + + print ++test ": " ( (0 == "0") ? "OK" : "OOPS" ) + print ++test ": " ( (0 == num) ? "OK" : "OOPS" ) + print ++test ": " ( (0 != $2) ? "OK" : "OOPS" ) + print ++test ": " ( (0 == $1) ? "OK" : "OOPS" ) + + print ++test ": " ( ($1 != "0") ? "OK" : "OOPS" ) + print ++test ": " ( ($1 == num) ? "OK" : "OOPS" ) + print ++test ": " ( ($2 != 0) ? "OK" : "OOPS" ) + print ++test ": " ( ($2 != $1) ? "OK" : "OOPS" ) + print ++test ": " ( ($3 == 0) ? "OK" : "OOPS" ) + print ++test ": " ( ($3 == $1) ? "OK" : "OOPS" ) + print ++test ": " ( ($2 != $4) ? "OK" : "OOPS" ) # 15 +} +{ + a = "+2" + b = 2 + if (NR % 2) + c = a + b + print ++test ": " ( (a != b) ? "OK" : "OOPS" ) # 16 and 22 + + d = "2a" + b = 2 + if (NR % 2) + c = d + b + print ++test ": " ( (d != b) ? "OK" : "OOPS" ) + + print ++test ": " ( (d + 0 == b) ? "OK" : "OOPS" ) + + e = "2" + print ++test ": " ( (e == b "") ? "OK" : "OOPS" ) + + a = "2.13" + print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" ) + + a = "2.130000" + print ++test ": " ( (a != 2.13) ? "OK" : "OOPS" ) + + if (NR == 2) { + CONVFMT = "%.6f" + print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" ) + } +}' diff --git a/gnu/usr.bin/awk/PROBLEMS b/gnu/usr.bin/awk/PROBLEMS new file mode 100644 index 0000000..3b7c514 --- /dev/null +++ b/gnu/usr.bin/awk/PROBLEMS @@ -0,0 +1,6 @@ +This is a list of known problems in gawk 2.15. +Hopefully they will all be fixed in the next major release of gawk. + +Please keep in mind that the code is still undergoing significant evolution. + +1. Gawk's printf is probably still not POSIX compliant. diff --git a/gnu/usr.bin/awk/README b/gnu/usr.bin/awk/README new file mode 100644 index 0000000..f4bd3df --- /dev/null +++ b/gnu/usr.bin/awk/README @@ -0,0 +1,116 @@ +README: + +This is GNU Awk 2.15. It should be upwardly compatible with the +System V Release 4 awk. It is almost completely compliant with draft 11.3 +of POSIX 1003.2. + +This release adds new features -- see NEWS for details. + +See the installation instructions, below. + +Known problems are given in the PROBLEMS file. Work to be done is +described briefly in the FUTURES file. Verified ports are listed in +the PORTS file. Changes in this version are summarized in the CHANGES file. +Please read the LIMITATIONS and ACKNOWLEDGMENT files. + +Read the file POSIX for a discussion of how the standard says comparisons +should be done vs. how they really should be done and how gawk does them. + +To format the documentation with TeX, you must use texinfo.tex 2.53 +or later. Otherwise footnotes look unacceptable. + +If you wish to remake the Info files, you should use makeinfo. The 2.15 +version of makeinfo works with no errors. + +The man page is up to date. + +INSTALLATION: + +Check whether there is a system-specific README file for your system. + +Makefile.in may need some tailoring. The only changes necessary should +be to change installation targets or to change compiler flags. +The changes to make in Makefile.in are commented and should be obvious. + +All other changes should be made in a config file. Samples for +various systems are included in the config directory. Starting with +2.11, our intent has been to make the code conform to standards (ANSI, +POSIX, SVID, in that order) whenever possible, and to not penalize +standard conforming systems. We have included substitute versions of +routines not universally available. Simply add the appropriate define +for the missing feature(s) on your system. + +If you have neither bison nor yacc, use the awktab.c file here. It was +generated with bison, and should have no AT&T code in it. (Note that +modifying awk.y without bison or yacc will be difficult, at best. You might +want to get a copy of bison from the FSF too.) + +If no config file is included for your system, start by copying one +for a similar system. One way of determining the defines needed is to +try to load gawk with nothing defined and see what routines are +unresolved by the loader. This should give you a good idea of how to +proceed. + +The next release will use the FSF autoconfig program, so we are no longer +soliciting new config files. + +If you have an MS-DOS system, use the stuff in the pc directory. +For an Atari there is an atari directory and similarly one for VMS. + +Chapter 16 of The GAWK Manual discusses configuration in detail. + +After successful compilation, do 'make test' to run a small test +suite. There should be no output from the 'cmp' invocations except in +the cases where there are small differences in floating point values. +If there are other differences, please investigate and report the +problem. + +PRINTING THE MANUAL + +The 'support' directory contains texinfo.tex 2.65, which will be necessary +for printing the manual, and the texindex.c program from the texinfo +distribution which is also necessary. See the makefile for the steps needed +to get a DVI file from the manual. + +CAVEATS + +The existence of a patchlevel.h file does *N*O*T* imply a commitment on +our part to issue bug fixes or patches. It is there in case we should +decide to do so. + +BUG REPORTS AND FIXES (Un*x systems): + +Please coordinate changes through David Trueman and/or Arnold Robbins. + +David Trueman +Department of Mathematics, Statistics and Computing Science, +Dalhousie University, Halifax, Nova Scotia, Canada + +UUCP: {uunet utai watmath}!dalcs!david +INTERNET: david@cs.dal.ca + +Arnold Robbins +1736 Reindeer Drive +Atlanta, GA, 30329, USA + +INTERNET: arnold@skeeve.atl.ga.us +UUCP: { gatech, emory, emoryu1 }!skeeve!arnold + +BUG REPORTS AND FIXES (non-Unix ports): + +MS-DOS: + Scott Deifik + AMGEN Inc. + Amgen Center, Bldg.17-Dept.393 + Thousand Oaks, CA 91320-1789 + Tel-805-499-5725 ext.4677 + Fax-805-498-0358 + scottd@amgen.com + +VMS: + Pat Rankin + rankin@eql.caltech.edu (e-mail only) + +Atari ST: + Michal Jaegermann + NTOMCZAK@vm.ucs.UAlberta.CA (e-mail only) diff --git a/gnu/usr.bin/awk/array.c b/gnu/usr.bin/awk/array.c new file mode 100644 index 0000000..59be340 --- /dev/null +++ b/gnu/usr.bin/awk/array.c @@ -0,0 +1,293 @@ +/* + * array.c - routines for associative arrays. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1)); + +NODE * +concat_exp(tree) +register NODE *tree; +{ + register NODE *r; + char *str; + char *s; + unsigned len; + int offset; + int subseplen; + char *subsep; + + if (tree->type != Node_expression_list) + return force_string(tree_eval(tree)); + r = force_string(tree_eval(tree->lnode)); + if (tree->rnode == NULL) + return r; + subseplen = SUBSEP_node->lnode->stlen; + subsep = SUBSEP_node->lnode->stptr; + len = r->stlen + subseplen + 2; + emalloc(str, char *, len, "concat_exp"); + memcpy(str, r->stptr, r->stlen+1); + s = str + r->stlen; + free_temp(r); + tree = tree->rnode; + while (tree) { + if (subseplen == 1) + *s++ = *subsep; + else { + memcpy(s, subsep, subseplen+1); + s += subseplen; + } + r = force_string(tree_eval(tree->lnode)); + len += r->stlen + subseplen; + offset = s - str; + erealloc(str, char *, len, "concat_exp"); + s = str + offset; + memcpy(s, r->stptr, r->stlen+1); + s += r->stlen; + free_temp(r); + tree = tree->rnode; + } + r = make_str_node(str, s - str, ALREADY_MALLOCED); + r->flags |= TEMP; + return r; +} + +/* Flush all the values in symbol[] before doing a split() */ +void +assoc_clear(symbol) +NODE *symbol; +{ + int i; + NODE *bucket, *next; + + if (symbol->var_array == 0) + return; + for (i = 0; i < HASHSIZE; i++) { + for (bucket = symbol->var_array[i]; bucket; bucket = next) { + next = bucket->ahnext; + unref(bucket->ahname); + unref(bucket->ahvalue); + freenode(bucket); + } + symbol->var_array[i] = 0; + } +} + +/* + * calculate the hash function of the string in subs + */ +unsigned int +hash(s, len) +register char *s; +register int len; +{ + register unsigned long h = 0, g; + + while (len--) { + h = (h << 4) + *s++; + g = (h & 0xf0000000); + if (g) { + h = h ^ (g >> 24); + h = h ^ g; + } + } + if (h < HASHSIZE) + return h; + else + return h%HASHSIZE; +} + +/* + * locate symbol[subs] + */ +static NODE * /* NULL if not found */ +assoc_find(symbol, subs, hash1) +NODE *symbol; +register NODE *subs; +int hash1; +{ + register NODE *bucket, *prev = 0; + + for (bucket = symbol->var_array[hash1]; bucket; bucket = bucket->ahnext) { + if (cmp_nodes(bucket->ahname, subs) == 0) { + if (prev) { /* move found to front of chain */ + prev->ahnext = bucket->ahnext; + bucket->ahnext = symbol->var_array[hash1]; + symbol->var_array[hash1] = bucket; + } + return bucket; + } else + prev = bucket; /* save previous list entry */ + } + return NULL; +} + +/* + * test whether the array element symbol[subs] exists or not + */ +int +in_array(symbol, subs) +NODE *symbol, *subs; +{ + register int hash1; + + if (symbol->type == Node_param_list) + symbol = stack_ptr[symbol->param_cnt]; + if (symbol->var_array == 0) + return 0; + subs = concat_exp(subs); /* concat_exp returns a string node */ + hash1 = hash(subs->stptr, subs->stlen); + if (assoc_find(symbol, subs, hash1) == NULL) { + free_temp(subs); + return 0; + } else { + free_temp(subs); + return 1; + } +} + +/* + * SYMBOL is the address of the node (or other pointer) being dereferenced. + * SUBS is a number or string used as the subscript. + * + * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it + * isn't there. Returns a pointer ala get_lhs to where its value is stored + */ +NODE ** +assoc_lookup(symbol, subs) +NODE *symbol, *subs; +{ + register int hash1; + register NODE *bucket; + + (void) force_string(subs); + hash1 = hash(subs->stptr, subs->stlen); + + if (symbol->var_array == 0) { /* this table really should grow + * dynamically */ + unsigned size; + + size = sizeof(NODE *) * HASHSIZE; + emalloc(symbol->var_array, NODE **, size, "assoc_lookup"); + memset((char *)symbol->var_array, 0, size); + symbol->type = Node_var_array; + } else { + bucket = assoc_find(symbol, subs, hash1); + if (bucket != NULL) { + free_temp(subs); + return &(bucket->ahvalue); + } + } + + /* It's not there, install it. */ + if (do_lint && subs->stlen == 0) + warning("subscript of array `%s' is null string", + symbol->vname); + getnode(bucket); + bucket->type = Node_ahash; + if (subs->flags & TEMP) + bucket->ahname = dupnode(subs); + else { + unsigned int saveflags = subs->flags; + + subs->flags &= ~MALLOC; + bucket->ahname = dupnode(subs); + subs->flags = saveflags; + } + free_temp(subs); + + /* array subscripts are strings */ + bucket->ahname->flags &= ~NUMBER; + bucket->ahname->flags |= STRING; + bucket->ahvalue = Nnull_string; + bucket->ahnext = symbol->var_array[hash1]; + symbol->var_array[hash1] = bucket; + return &(bucket->ahvalue); +} + +void +do_delete(symbol, tree) +NODE *symbol, *tree; +{ + register int hash1; + register NODE *bucket, *last; + NODE *subs; + + if (symbol->type == Node_param_list) + symbol = stack_ptr[symbol->param_cnt]; + if (symbol->var_array == 0) + return; + subs = concat_exp(tree); /* concat_exp returns string node */ + hash1 = hash(subs->stptr, subs->stlen); + + last = NULL; + for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext) + if (cmp_nodes(bucket->ahname, subs) == 0) + break; + free_temp(subs); + if (bucket == NULL) + return; + if (last) + last->ahnext = bucket->ahnext; + else + symbol->var_array[hash1] = bucket->ahnext; + unref(bucket->ahname); + unref(bucket->ahvalue); + freenode(bucket); +} + +void +assoc_scan(symbol, lookat) +NODE *symbol; +struct search *lookat; +{ + if (!symbol->var_array) { + lookat->retval = NULL; + return; + } + lookat->arr_ptr = symbol->var_array; + lookat->arr_end = lookat->arr_ptr + HASHSIZE; /* added */ + lookat->bucket = symbol->var_array[0]; + assoc_next(lookat); +} + +void +assoc_next(lookat) +struct search *lookat; +{ + while (lookat->arr_ptr < lookat->arr_end) { + if (lookat->bucket != 0) { + lookat->retval = lookat->bucket->ahname; + lookat->bucket = lookat->bucket->ahnext; + return; + } + lookat->arr_ptr++; + if (lookat->arr_ptr < lookat->arr_end) + lookat->bucket = *(lookat->arr_ptr); + else + lookat->retval = NULL; + } + return; +} diff --git a/gnu/usr.bin/awk/awk.1 b/gnu/usr.bin/awk/awk.1 new file mode 100644 index 0000000..0338485 --- /dev/null +++ b/gnu/usr.bin/awk/awk.1 @@ -0,0 +1,1873 @@ +.ds PX \s-1POSIX\s+1 +.ds UX \s-1UNIX\s+1 +.ds AN \s-1ANSI\s+1 +.TH GAWK 1 "Apr 15 1993" "Free Software Foundation" "Utility Commands" +.SH NAME +gawk \- pattern scanning and processing language +.SH SYNOPSIS +.B gawk +[ POSIX or GNU style options ] +.B \-f +.I program-file +[ +.B \-\^\- +] file .\^.\^. +.br +.B gawk +[ POSIX or GNU style options ] +[ +.B \-\^\- +] +.I program-text +file .\^.\^. +.SH DESCRIPTION +.I Gawk +is the GNU Project's implementation of the AWK programming language. +It conforms to the definition of the language in +the \*(PX 1003.2 Command Language And Utilities Standard. +This version in turn is based on the description in +.IR "The AWK Programming Language" , +by Aho, Kernighan, and Weinberger, +with the additional features defined in the System V Release 4 version +of \*(UX +.IR awk . +.I Gawk +also provides some GNU-specific extensions. +.PP +The command line consists of options to +.I gawk +itself, the AWK program text (if not supplied via the +.B \-f +or +.B \-\^\-file +options), and values to be made +available in the +.B ARGC +and +.B ARGV +pre-defined AWK variables. +.SH OPTIONS +.PP +.I Gawk +options may be either the traditional \*(PX one letter options, +or the GNU style long options. \*(PX style options start with a single ``\-'', +while GNU long options start with ``\-\^\-''. +GNU style long options are provided for both GNU-specific features and +for \*(PX mandated features. Other implementations of the AWK language +are likely to only accept the traditional one letter options. +.PP +Following the \*(PX standard, +.IR gawk -specific +options are supplied via arguments to the +.B \-W +option. Multiple +.B \-W +options may be supplied, or multiple arguments may be supplied together +if they are separated by commas, or enclosed in quotes and separated +by white space. +Case is ignored in arguments to the +.B \-W +option. +Each +.B \-W +option has a corresponding GNU style long option, as detailed below. +.PP +.I Gawk +accepts the following options. +.TP +.PD 0 +.BI \-F " fs" +.TP +.PD +.BI \-\^\-field-separator= fs +Use +.I fs +for the input field separator (the value of the +.B FS +predefined +variable). +.TP +.PD 0 +\fB\-v\fI var\fB\^=\^\fIval\fR +.TP +.PD +\fB\-\^\-assign=\fIvar\fB\^=\^\fIval\fR +Assign the value +.IR val , +to the variable +.IR var , +before execution of the program begins. +Such variable values are available to the +.B BEGIN +block of an AWK program. +.TP +.PD 0 +.BI \-f " program-file" +.TP +.PD +.BI \-\^\-file= program-file +Read the AWK program source from the file +.IR program-file , +instead of from the first command line argument. +Multiple +.B \-f +(or +.BR \-\^\-file ) +options may be used. +.TP \w'\fB\-\^\-copyright\fR'u+1n +.PD 0 +.B "\-W compat" +.TP +.PD +.B \-\^\-compat +Run in +.I compatibility +mode. In compatibility mode, +.I gawk +behaves identically to \*(UX +.IR awk ; +none of the GNU-specific extensions are recognized. +See +.BR "GNU EXTENSIONS" , +below, for more information. +.TP +.PD 0 +.B "\-W copyleft" +.TP +.PD 0 +.B "\-W copyright" +.TP +.PD 0 +.B \-\^\-copyleft +.TP +.PD +.B \-\^\-copyright +Print the short version of the GNU copyright information message on +the error output. +.TP +.PD 0 +.B "\-W help" +.TP +.PD 0 +.B "\-W usage" +.TP +.PD 0 +.B \-\^\-help +.TP +.PD +.B \-\^\-usage +Print a relatively short summary of the available options on +the error output. +.TP +.PD 0 +.B "\-W lint" +.TP +.PD 0 +.B \-\^\-lint +Provide warnings about constructs that are +dubious or non-portable to other AWK implementations. +.ig +.\" This option is left undocumented, on purpose. +.TP +.PD 0 +.B "\-W nostalgia" +.TP +.PD +.B \-\^\-nostalgia +Provide a moment of nostalgia for long time +.I awk +users. +.. +.TP +.PD 0 +.B "\-W posix" +.TP +.PD +.B \-\^\-posix +This turns on +.I compatibility +mode, with the following additional restrictions: +.RS +.TP \w'\(bu'u+1n +\(bu +.B \ex +escape sequences are not recognized. +.TP +\(bu +The synonym +.B func +for the keyword +.B function +is not recognized. +.TP +\(bu +The operators +.B ** +and +.B **= +cannot be used in place of +.B ^ +and +.BR ^= . +.RE +.TP +.PD 0 +.BI "\-W source=" program-text +.TP +.PD +.BI \-\^\-source= program-text +Use +.I program-text +as AWK program source code. +This option allows the easy intermixing of library functions (used via the +.B \-f +and +.B \-\^\-file +options) with source code entered on the command line. +It is intended primarily for medium to large size AWK programs used +in shell scripts. +.sp .5 +The +.B "\-W source=" +form of this option uses the rest of the command line argument for +.IR program-text ; +no other options to +.B \-W +will be recognized in the same argument. +.TP +.PD 0 +.B "\-W version" +.TP +.PD +.B \-\^\-version +Print version information for this particular copy of +.I gawk +on the error output. +This is useful mainly for knowing if the current copy of +.I gawk +on your system +is up to date with respect to whatever the Free Software Foundation +is distributing. +.TP +.B \-\^\- +Signal the end of options. This is useful to allow further arguments to the +AWK program itself to start with a ``\-''. +This is mainly for consistency with the argument parsing convention used +by most other \*(PX programs. +.PP +Any other options are flagged as illegal, but are otherwise ignored. +.SH AWK PROGRAM EXECUTION +.PP +An AWK program consists of a sequence of pattern-action statements +and optional function definitions. +.RS +.PP +\fIpattern\fB { \fIaction statements\fB }\fR +.br +\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR +.RE +.PP +.I Gawk +first reads the program source from the +.IR program-file (s) +if specified, or from the first non-option argument on the command line. +The +.B \-f +option may be used multiple times on the command line. +.I Gawk +will read the program text as if all the +.IR program-file s +had been concatenated together. This is useful for building libraries +of AWK functions, without having to include them in each new AWK +program that uses them. To use a library function in a file from a +program typed in on the command line, specify +.B /dev/tty +as one of the +.IR program-file s, +type your program, and end it with a +.B ^D +(control-d). +.PP +The environment variable +.B AWKPATH +specifies a search path to use when finding source files named with +the +.B \-f +option. If this variable does not exist, the default path is +\fB".:/usr/lib/awk:/usr/local/lib/awk"\fR. +If a file name given to the +.B \-f +option contains a ``/'' character, no path search is performed. +.PP +.I Gawk +executes AWK programs in the following order. +First, +.I gawk +compiles the program into an internal form. +Next, all variable assignments specified via the +.B \-v +option are performed. Then, +.I gawk +executes the code in the +.B BEGIN +block(s) (if any), +and then proceeds to read +each file named in the +.B ARGV +array. +If there are no files named on the command line, +.I gawk +reads the standard input. +.PP +If a filename on the command line has the form +.IB var = val +it is treated as a variable assignment. The variable +.I var +will be assigned the value +.IR val . +(This happens after any +.B BEGIN +block(s) have been run.) +Command line variable assignment +is most useful for dynamically assigning values to the variables +AWK uses to control how input is broken into fields and records. It +is also useful for controlling state if multiple passes are needed over +a single data file. +.PP +If the value of a particular element of +.B ARGV +is empty (\fB""\fR), +.I gawk +skips over it. +.PP +For each line in the input, +.I gawk +tests to see if it matches any +.I pattern +in the AWK program. +For each pattern that the line matches, the associated +.I action +is executed. +The patterns are tested in the order they occur in the program. +.PP +Finally, after all the input is exhausted, +.I gawk +executes the code in the +.B END +block(s) (if any). +.SH VARIABLES AND FIELDS +AWK variables are dynamic; they come into existence when they are +first used. Their values are either floating-point numbers or strings, +or both, +depending upon how they are used. AWK also has one dimension +arrays; multiply dimensioned arrays may be simulated. +Several pre-defined variables are set as a program +runs; these will be described as needed and summarized below. +.SS Fields +.PP +As each input line is read, +.I gawk +splits the line into +.IR fields , +using the value of the +.B FS +variable as the field separator. +If +.B FS +is a single character, fields are separated by that character. +Otherwise, +.B FS +is expected to be a full regular expression. +In the special case that +.B FS +is a single blank, fields are separated +by runs of blanks and/or tabs. +Note that the value of +.B IGNORECASE +(see below) will also affect how fields are split when +.B FS +is a regular expression. +.PP +If the +.B FIELDWIDTHS +variable is set to a space separated list of numbers, each field is +expected to have fixed width, and +.I gawk +will split up the record using the specified widths. The value of +.B FS +is ignored. +Assigning a new value to +.B FS +overrides the use of +.BR FIELDWIDTHS , +and restores the default behavior. +.PP +Each field in the input line may be referenced by its position, +.BR $1 , +.BR $2 , +and so on. +.B $0 +is the whole line. The value of a field may be assigned to as well. +Fields need not be referenced by constants: +.RS +.PP +.ft B +n = 5 +.br +print $n +.ft R +.RE +.PP +prints the fifth field in the input line. +The variable +.B NF +is set to the total number of fields in the input line. +.PP +References to non-existent fields (i.e. fields after +.BR $NF ) +produce the null-string. However, assigning to a non-existent field +(e.g., +.BR "$(NF+2) = 5" ) +will increase the value of +.BR NF , +create any intervening fields with the null string as their value, and +cause the value of +.B $0 +to be recomputed, with the fields being separated by the value of +.BR OFS . +.SS Built-in Variables +.PP +AWK's built-in variables are: +.PP +.TP \w'\fBFIELDWIDTHS\fR'u+1n +.B ARGC +The number of command line arguments (does not include options to +.IR gawk , +or the program source). +.TP +.B ARGIND +The index in +.B ARGV +of the current file being processed. +.TP +.B ARGV +Array of command line arguments. The array is indexed from +0 to +.B ARGC +\- 1. +Dynamically changing the contents of +.B ARGV +can control the files used for data. +.TP +.B CONVFMT +The conversion format for numbers, \fB"%.6g"\fR, by default. +.TP +.B ENVIRON +An array containing the values of the current environment. +The array is indexed by the environment variables, each element being +the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be +.BR /u/arnold ). +Changing this array does not affect the environment seen by programs which +.I gawk +spawns via redirection or the +.B system() +function. +(This may change in a future version of +.IR gawk .) +.\" but don't hold your breath... +.TP +.B ERRNO +If a system error occurs either doing a redirection for +.BR getline , +during a read for +.BR getline , +or during a +.BR close , +then +.B ERRNO +will contain +a string describing the error. +.TP +.B FIELDWIDTHS +A white-space separated list of fieldwidths. When set, +.I gawk +parses the input into fields of fixed width, instead of using the +value of the +.B FS +variable as the field separator. +The fixed field width facility is still experimental; expect the +semantics to change as +.I gawk +evolves over time. +.TP +.B FILENAME +The name of the current input file. +If no files are specified on the command line, the value of +.B FILENAME +is ``\-''. +.TP +.B FNR +The input record number in the current input file. +.TP +.B FS +The input field separator, a blank by default. +.TP +.B IGNORECASE +Controls the case-sensitivity of all regular expression operations. If +.B IGNORECASE +has a non-zero value, then pattern matching in rules, +field splitting with +.BR FS , +regular expression +matching with +.B ~ +and +.BR !~ , +and the +.BR gsub() , +.BR index() , +.BR match() , +.BR split() , +and +.B sub() +pre-defined functions will all ignore case when doing regular expression +operations. Thus, if +.B IGNORECASE +is not equal to zero, +.B /aB/ +matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP, +and \fB"AB"\fP. +As with all AWK variables, the initial value of +.B IGNORECASE +is zero, so all regular expression operations are normally case-sensitive. +.TP +.B NF +The number of fields in the current input record. +.TP +.B NR +The total number of input records seen so far. +.TP +.B OFMT +The output format for numbers, \fB"%.6g"\fR, by default. +.TP +.B OFS +The output field separator, a blank by default. +.TP +.B ORS +The output record separator, by default a newline. +.TP +.B RS +The input record separator, by default a newline. +.B RS +is exceptional in that only the first character of its string +value is used for separating records. +(This will probably change in a future release of +.IR gawk .) +If +.B RS +is set to the null string, then records are separated by +blank lines. +When +.B RS +is set to the null string, then the newline character always acts as +a field separator, in addition to whatever value +.B FS +may have. +.TP +.B RSTART +The index of the first character matched by +.BR match() ; +0 if no match. +.TP +.B RLENGTH +The length of the string matched by +.BR match() ; +\-1 if no match. +.TP +.B SUBSEP +The character used to separate multiple subscripts in array +elements, by default \fB"\e034"\fR. +.SS Arrays +.PP +Arrays are subscripted with an expression between square brackets +.RB ( [ " and " ] ). +If the expression is an expression list +.RI ( expr ", " expr " ...)" +then the array subscript is a string consisting of the +concatenation of the (string) value of each expression, +separated by the value of the +.B SUBSEP +variable. +This facility is used to simulate multiply dimensioned +arrays. For example: +.PP +.RS +.ft B +i = "A" ;\^ j = "B" ;\^ k = "C" +.br +x[i, j, k] = "hello, world\en" +.ft R +.RE +.PP +assigns the string \fB"hello, world\en"\fR to the element of the array +.B x +which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in AWK +are associative, i.e. indexed by string values. +.PP +The special operator +.B in +may be used in an +.B if +or +.B while +statement to see if an array has an index consisting of a particular +value. +.PP +.RS +.ft B +.nf +if (val in array) + print array[val] +.fi +.ft +.RE +.PP +If the array has multiple subscripts, use +.BR "(i, j) in array" . +.PP +The +.B in +construct may also be used in a +.B for +loop to iterate over all the elements of an array. +.PP +An element may be deleted from an array using the +.B delete +statement. +.SS Variable Typing And Conversion +.PP +Variables and fields +may be (floating point) numbers, or strings, or both. How the +value of a variable is interpreted depends upon its context. If used in +a numeric expression, it will be treated as a number, if used as a string +it will be treated as a string. +.PP +To force a variable to be treated as a number, add 0 to it; to force it +to be treated as a string, concatenate it with the null string. +.PP +When a string must be converted to a number, the conversion is accomplished +using +.IR atof (3). +A number is converted to a string by using the value of +.B CONVFMT +as a format string for +.IR sprintf (3), +with the numeric value of the variable as the argument. +However, even though all numbers in AWK are floating-point, +integral values are +.I always +converted as integers. Thus, given +.PP +.RS +.ft B +.nf +CONVFMT = "%2.2f" +a = 12 +b = a "" +.fi +.ft R +.RE +.PP +the variable +.B b +has a value of \fB"12"\fR and not \fB"12.00"\fR. +.PP +.I Gawk +performs comparisons as follows: +If two variables are numeric, they are compared numerically. +If one value is numeric and the other has a string value that is a +``numeric string,'' then comparisons are also done numerically. +Otherwise, the numeric value is converted to a string and a string +comparison is performed. +Two strings are compared, of course, as strings. +According to the \*(PX standard, even if two strings are +numeric strings, a numeric comparison is performed. However, this is +clearly incorrect, and +.I gawk +does not do this. +.PP +Uninitialized variables have the numeric value 0 and the string value "" +(the null, or empty, string). +.SH PATTERNS AND ACTIONS +AWK is a line oriented language. The pattern comes first, and then the +action. Action statements are enclosed in +.B { +and +.BR } . +Either the pattern may be missing, or the action may be missing, but, +of course, not both. If the pattern is missing, the action will be +executed for every single line of input. +A missing action is equivalent to +.RS +.PP +.B "{ print }" +.RE +.PP +which prints the entire line. +.PP +Comments begin with the ``#'' character, and continue until the +end of the line. +Blank lines may be used to separate statements. +Normally, a statement ends with a newline, however, this is not the +case for lines ending in +a ``,'', ``{'', ``?'', ``:'', ``&&'', or ``||''. +Lines ending in +.B do +or +.B else +also have their statements automatically continued on the following line. +In other cases, a line can be continued by ending it with a ``\e'', +in which case the newline will be ignored. +.PP +Multiple statements may +be put on one line by separating them with a ``;''. +This applies to both the statements within the action part of a +pattern-action pair (the usual case), +and to the pattern-action statements themselves. +.SS Patterns +AWK patterns may be one of the following: +.PP +.RS +.nf +.B BEGIN +.B END +.BI / "regular expression" / +.I "relational expression" +.IB pattern " && " pattern +.IB pattern " || " pattern +.IB pattern " ? " pattern " : " pattern +.BI ( pattern ) +.BI ! " pattern" +.IB pattern1 ", " pattern2 +.fi +.RE +.PP +.B BEGIN +and +.B END +are two special kinds of patterns which are not tested against +the input. +The action parts of all +.B BEGIN +patterns are merged as if all the statements had +been written in a single +.B BEGIN +block. They are executed before any +of the input is read. Similarly, all the +.B END +blocks are merged, +and executed when all the input is exhausted (or when an +.B exit +statement is executed). +.B BEGIN +and +.B END +patterns cannot be combined with other patterns in pattern expressions. +.B BEGIN +and +.B END +patterns cannot have missing action parts. +.PP +For +.BI / "regular expression" / +patterns, the associated statement is executed for each input line that matches +the regular expression. +Regular expressions are the same as those in +.IR egrep (1), +and are summarized below. +.PP +A +.I "relational expression" +may use any of the operators defined below in the section on actions. +These generally test whether certain fields match certain regular expressions. +.PP +The +.BR && , +.BR || , +and +.B ! +operators are logical AND, logical OR, and logical NOT, respectively, as in C. +They do short-circuit evaluation, also as in C, and are used for combining +more primitive pattern expressions. As in most languages, parentheses +may be used to change the order of evaluation. +.PP +The +.B ?\^: +operator is like the same operator in C. If the first pattern is true +then the pattern used for testing is the second pattern, otherwise it is +the third. Only one of the second and third patterns is evaluated. +.PP +The +.IB pattern1 ", " pattern2 +form of an expression is called a range pattern. +It matches all input records starting with a line that matches +.IR pattern1 , +and continuing until a record that matches +.IR pattern2 , +inclusive. It does not combine with any other sort of pattern expression. +.SS Regular Expressions +Regular expressions are the extended kind found in +.IR egrep . +They are composed of characters as follows: +.TP \w'\fB[^\fIabc...\fB]\fR'u+2n +.I c +matches the non-metacharacter +.IR c . +.TP +.I \ec +matches the literal character +.IR c . +.TP +.B . +matches any character except newline. +.TP +.B ^ +matches the beginning of a line or a string. +.TP +.B $ +matches the end of a line or a string. +.TP +.BI [ abc... ] +character class, matches any of the characters +.IR abc... . +.TP +.BI [^ abc... ] +negated character class, matches any character except +.I abc... +and newline. +.TP +.IB r1 | r2 +alternation: matches either +.I r1 +or +.IR r2 . +.TP +.I r1r2 +concatenation: matches +.IR r1 , +and then +.IR r2 . +.TP +.IB r + +matches one or more +.IR r 's. +.TP +.IB r * +matches zero or more +.IR r 's. +.TP +.IB r ? +matches zero or one +.IR r 's. +.TP +.BI ( r ) +grouping: matches +.IR r . +.PP +The escape sequences that are valid in string constants (see below) +are also legal in regular expressions. +.SS Actions +Action statements are enclosed in braces, +.B { +and +.BR } . +Action statements consist of the usual assignment, conditional, and looping +statements found in most languages. The operators, control statements, +and input/output statements +available are patterned after those in C. +.SS Operators +.PP +The operators in AWK, in order of increasing precedence, are +.PP +.TP "\w'\fB*= /= %= ^=\fR'u+1n" +.PD 0 +.B "= += \-=" +.TP +.PD +.B "*= /= %= ^=" +Assignment. Both absolute assignment +.BI ( var " = " value ) +and operator-assignment (the other forms) are supported. +.TP +.B ?: +The C conditional expression. This has the form +.IB expr1 " ? " expr2 " : " expr3\c +\&. If +.I expr1 +is true, the value of the expression is +.IR expr2 , +otherwise it is +.IR expr3 . +Only one of +.I expr2 +and +.I expr3 +is evaluated. +.TP +.B || +Logical OR. +.TP +.B && +Logical AND. +.TP +.B "~ !~" +Regular expression match, negated match. +.B NOTE: +Do not use a constant regular expression +.RB ( /foo/ ) +on the left-hand side of a +.B ~ +or +.BR !~ . +Only use one on the right-hand side. The expression +.BI "/foo/ ~ " exp +has the same meaning as \fB(($0 ~ /foo/) ~ \fIexp\fB)\fR. +This is usually +.I not +what was intended. +.TP +.PD 0 +.B "< >" +.TP +.PD 0 +.B "<= >=" +.TP +.PD +.B "!= ==" +The regular relational operators. +.TP +.I blank +String concatenation. +.TP +.B "+ \-" +Addition and subtraction. +.TP +.B "* / %" +Multiplication, division, and modulus. +.TP +.B "+ \- !" +Unary plus, unary minus, and logical negation. +.TP +.B ^ +Exponentiation (\fB**\fR may also be used, and \fB**=\fR for +the assignment operator). +.TP +.B "++ \-\^\-" +Increment and decrement, both prefix and postfix. +.TP +.B $ +Field reference. +.SS Control Statements +.PP +The control statements are +as follows: +.PP +.RS +.nf +\fBif (\fIcondition\fB) \fIstatement\fR [ \fBelse\fI statement \fR] +\fBwhile (\fIcondition\fB) \fIstatement \fR +\fBdo \fIstatement \fBwhile (\fIcondition\fB)\fR +\fBfor (\fIexpr1\fB; \fIexpr2\fB; \fIexpr3\fB) \fIstatement\fR +\fBfor (\fIvar \fBin\fI array\fB) \fIstatement\fR +\fBbreak\fR +\fBcontinue\fR +\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR +\fBexit\fR [ \fIexpression\fR ] +\fB{ \fIstatements \fB} +.fi +.RE +.SS "I/O Statements" +.PP +The input/output statements are as follows: +.PP +.TP "\w'\fBprintf \fIfmt, expr-list\fR'u+1n" +.BI close( filename ) +Close file (or pipe, see below). +.TP +.B getline +Set +.B $0 +from next input record; set +.BR NF , +.BR NR , +.BR FNR . +.TP +.BI "getline <" file +Set +.B $0 +from next record of +.IR file ; +set +.BR NF . +.TP +.BI getline " var" +Set +.I var +from next input record; set +.BR NF , +.BR FNR . +.TP +.BI getline " var" " <" file +Set +.I var +from next record of +.IR file . +.TP +.B next +Stop processing the current input record. The next input record +is read and processing starts over with the first pattern in the +AWK program. If the end of the input data is reached, the +.B END +block(s), if any, are executed. +.TP +.B "next file" +Stop processing the current input file. The next input record read +comes from the next input file. +.B FILENAME +is updated, +.B FNR +is reset to 1, and processing starts over with the first pattern in the +AWK program. If the end of the input data is reached, the +.B END +block(s), if any, are executed. +.TP +.B print +Prints the current record. +.TP +.BI print " expr-list" +Prints expressions. +.TP +.BI print " expr-list" " >" file +Prints expressions on +.IR file . +.TP +.BI printf " fmt, expr-list" +Format and print. +.TP +.BI printf " fmt, expr-list" " >" file +Format and print on +.IR file . +.TP +.BI system( cmd-line ) +Execute the command +.IR cmd-line , +and return the exit status. +(This may not be available on non-\*(PX systems.) +.PP +Other input/output redirections are also allowed. For +.B print +and +.BR printf , +.BI >> file +appends output to the +.IR file , +while +.BI | " command" +writes on a pipe. +In a similar fashion, +.IB command " | getline" +pipes into +.BR getline . +.BR Getline +will return 0 on end of file, and \-1 on an error. +.SS The \fIprintf\fP\^ Statement +.PP +The AWK versions of the +.B printf +statement and +.B sprintf() +function +(see below) +accept the following conversion specification formats: +.TP +.B %c +An \s-1ASCII\s+1 character. +If the argument used for +.B %c +is numeric, it is treated as a character and printed. +Otherwise, the argument is assumed to be a string, and the only first +character of that string is printed. +.TP +.B %d +A decimal number (the integer part). +.TP +.B %i +Just like +.BR %d . +.TP +.B %e +A floating point number of the form +.BR [\-]d.ddddddE[+\^\-]dd . +.TP +.B %f +A floating point number of the form +.BR [\-]ddd.dddddd . +.TP +.B %g +Use +.B e +or +.B f +conversion, whichever is shorter, with nonsignificant zeros suppressed. +.TP +.B %o +An unsigned octal number (again, an integer). +.TP +.B %s +A character string. +.TP +.B %x +An unsigned hexadecimal number (an integer). +.TP +.B %X +Like +.BR %x , +but using +.B ABCDEF +instead of +.BR abcdef . +.TP +.B %% +A single +.B % +character; no argument is converted. +.PP +There are optional, additional parameters that may lie between the +.B % +and the control letter: +.TP +.B \- +The expression should be left-justified within its field. +.TP +.I width +The field should be padded to this width. If the number has a leading +zero, then the field will be padded with zeros. +Otherwise it is padded with blanks. +.TP +.BI . prec +A number indicating the maximum width of strings or digits to the right +of the decimal point. +.PP +The dynamic +.I width +and +.I prec +capabilities of the \*(AN C +.B printf() +routines are supported. +A +.B * +in place of either the +.B width +or +.B prec +specifications will cause their values to be taken from +the argument list to +.B printf +or +.BR sprintf() . +.SS Special File Names +.PP +When doing I/O redirection from either +.B print +or +.B printf +into a file, +or via +.B getline +from a file, +.I gawk +recognizes certain special filenames internally. These filenames +allow access to open file descriptors inherited from +.IR gawk 's +parent process (usually the shell). +Other special filenames provide access information about the running +.B gawk +process. +The filenames are: +.TP \w'\fB/dev/stdout\fR'u+1n +.B /dev/pid +Reading this file returns the process ID of the current process, +in decimal, terminated with a newline. +.TP +.B /dev/ppid +Reading this file returns the parent process ID of the current process, +in decimal, terminated with a newline. +.TP +.B /dev/pgrpid +Reading this file returns the process group ID of the current process, +in decimal, terminated with a newline. +.TP +.B /dev/user +Reading this file returns a single record terminated with a newline. +The fields are separated with blanks. +.B $1 +is the value of the +.IR getuid (2) +system call, +.B $2 +is the value of the +.IR geteuid (2) +system call, +.B $3 +is the value of the +.IR getgid (2) +system call, and +.B $4 +is the value of the +.IR getegid (2) +system call. +If there are any additional fields, they are the group IDs returned by +.IR getgroups (2). +(Multiple groups may not be supported on all systems.) +.TP +.B /dev/stdin +The standard input. +.TP +.B /dev/stdout +The standard output. +.TP +.B /dev/stderr +The standard error output. +.TP +.BI /dev/fd/\^ n +The file associated with the open file descriptor +.IR n . +.PP +These are particularly useful for error messages. For example: +.PP +.RS +.ft B +print "You blew it!" > "/dev/stderr" +.ft R +.RE +.PP +whereas you would otherwise have to use +.PP +.RS +.ft B +print "You blew it!" | "cat 1>&2" +.ft R +.RE +.PP +These file names may also be used on the command line to name data files. +.SS Numeric Functions +.PP +AWK has the following pre-defined arithmetic functions: +.PP +.TP \w'\fBsrand(\^\fIexpr\^\fB)\fR'u+1n +.BI atan2( y , " x" ) +returns the arctangent of +.I y/x +in radians. +.TP +.BI cos( expr ) +returns the cosine in radians. +.TP +.BI exp( expr ) +the exponential function. +.TP +.BI int( expr ) +truncates to integer. +.TP +.BI log( expr ) +the natural logarithm function. +.TP +.B rand() +returns a random number between 0 and 1. +.TP +.BI sin( expr ) +returns the sine in radians. +.TP +.BI sqrt( expr ) +the square root function. +.TP +.BI srand( expr ) +use +.I expr +as a new seed for the random number generator. If no +.I expr +is provided, the time of day will be used. +The return value is the previous seed for the random +number generator. +.SS String Functions +.PP +AWK has the following pre-defined string functions: +.PP +.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n" +\fBgsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR +for each substring matching the regular expression +.I r +in the string +.IR t , +substitute the string +.IR s , +and return the number of substitutions. +If +.I t +is not supplied, use +.BR $0 . +.TP +.BI index( s , " t" ) +returns the index of the string +.I t +in the string +.IR s , +or 0 if +.I t +is not present. +.TP +.BI length( s ) +returns the length of the string +.IR s , +or the length of +.B $0 +if +.I s +is not supplied. +.TP +.BI match( s , " r" ) +returns the position in +.I s +where the regular expression +.I r +occurs, or 0 if +.I r +is not present, and sets the values of +.B RSTART +and +.BR RLENGTH . +.TP +\fBsplit(\fIs\fB, \fIa\fB, \fIr\fB)\fR +splits the string +.I s +into the array +.I a +on the regular expression +.IR r , +and returns the number of fields. If +.I r +is omitted, +.B FS +is used instead. +.TP +.BI sprintf( fmt , " expr-list" ) +prints +.I expr-list +according to +.IR fmt , +and returns the resulting string. +.TP +\fBsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR +just like +.BR gsub() , +but only the first matching substring is replaced. +.TP +\fBsubstr(\fIs\fB, \fIi\fB, \fIn\fB)\fR +returns the +.IR n -character +substring of +.I s +starting at +.IR i . +If +.I n +is omitted, the rest of +.I s +is used. +.TP +.BI tolower( str ) +returns a copy of the string +.IR str , +with all the upper-case characters in +.I str +translated to their corresponding lower-case counterparts. +Non-alphabetic characters are left unchanged. +.TP +.BI toupper( str ) +returns a copy of the string +.IR str , +with all the lower-case characters in +.I str +translated to their corresponding upper-case counterparts. +Non-alphabetic characters are left unchanged. +.SS Time Functions +.PP +Since one of the primary uses of AWK programs is processing log files +that contain time stamp information, +.I gawk +provides the following two functions for obtaining time stamps and +formatting them. +.PP +.TP "\w'\fBsystime()\fR'u+1n" +.B systime() +returns the current time of day as the number of seconds since the Epoch +(Midnight UTC, January 1, 1970 on \*(PX systems). +.TP +\fBstrftime(\fIformat\fR, \fItimestamp\fB)\fR +formats +.I timestamp +according to the specification in +.IR format. +The +.I timestamp +should be of the same form as returned by +.BR systime() . +If +.I timestamp +is missing, the current time of day is used. +See the specification for the +.B strftime() +function in \*(AN C for the format conversions that are +guaranteed to be available. +A public-domain version of +.IR strftime (3) +and a man page for it are shipped with +.IR gawk ; +if that version was used to build +.IR gawk , +then all of the conversions described in that man page are available to +.IR gawk. +.SS String Constants +.PP +String constants in AWK are sequences of characters enclosed +between double quotes (\fB"\fR). Within strings, certain +.I "escape sequences" +are recognized, as in C. These are: +.PP +.TP \w'\fB\e\^\fIddd\fR'u+1n +.B \e\e +A literal backslash. +.TP +.B \ea +The ``alert'' character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character. +.TP +.B \eb +backspace. +.TP +.B \ef +form-feed. +.TP +.B \en +new line. +.TP +.B \er +carriage return. +.TP +.B \et +horizontal tab. +.TP +.B \ev +vertical tab. +.TP +.BI \ex "\^hex digits" +The character represented by the string of hexadecimal digits following +the +.BR \ex . +As in \*(AN C, all following hexadecimal digits are considered part of +the escape sequence. +(This feature should tell us something about language design by committee.) +E.g., "\ex1B" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. +.TP +.BI \e ddd +The character represented by the 1-, 2-, or 3-digit sequence of octal +digits. E.g. "\e033" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. +.TP +.BI \e c +The literal character +.IR c\^ . +.PP +The escape sequences may also be used inside constant regular expressions +(e.g., +.B "/[\ \et\ef\en\er\ev]/" +matches whitespace characters). +.SH FUNCTIONS +Functions in AWK are defined as follows: +.PP +.RS +\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR +.RE +.PP +Functions are executed when called from within the action parts of regular +pattern-action statements. Actual parameters supplied in the function +call are used to instantiate the formal parameters declared in the function. +Arrays are passed by reference, other variables are passed by value. +.PP +Since functions were not originally part of the AWK language, the provision +for local variables is rather clumsy: They are declared as extra parameters +in the parameter list. The convention is to separate local variables from +real parameters by extra spaces in the parameter list. For example: +.PP +.RS +.ft B +.nf +function f(p, q, a, b) { # a & b are local + ..... } + +/abc/ { ... ; f(1, 2) ; ... } +.fi +.ft R +.RE +.PP +The left parenthesis in a function call is required +to immediately follow the function name, +without any intervening white space. +This is to avoid a syntactic ambiguity with the concatenation operator. +This restriction does not apply to the built-in functions listed above. +.PP +Functions may call each other and may be recursive. +Function parameters used as local variables are initialized +to the null string and the number zero upon function invocation. +.PP +The word +.B func +may be used in place of +.BR function . +.SH EXAMPLES +.nf +Print and sort the login names of all users: + +.ft B + BEGIN { FS = ":" } + { print $1 | "sort" } + +.ft R +Count lines in a file: + +.ft B + { nlines++ } + END { print nlines } + +.ft R +Precede each line by its number in the file: + +.ft B + { print FNR, $0 } + +.ft R +Concatenate and line number (a variation on a theme): + +.ft B + { print NR, $0 } +.ft R +.fi +.SH SEE ALSO +.IR egrep (1) +.PP +.IR "The AWK Programming Language" , +Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger, +Addison-Wesley, 1988. ISBN 0-201-07981-X. +.PP +.IR "The GAWK Manual" , +Edition 0.15, published by the Free Software Foundation, 1993. +.SH POSIX COMPATIBILITY +A primary goal for +.I gawk +is compatibility with the \*(PX standard, as well as with the +latest version of \*(UX +.IR awk . +To this end, +.I gawk +incorporates the following user visible +features which are not described in the AWK book, +but are part of +.I awk +in System V Release 4, and are in the \*(PX standard. +.PP +The +.B \-v +option for assigning variables before program execution starts is new. +The book indicates that command line variable assignment happens when +.I awk +would otherwise open the argument as a file, which is after the +.B BEGIN +block is executed. However, in earlier implementations, when such an +assignment appeared before any file names, the assignment would happen +.I before +the +.B BEGIN +block was run. Applications came to depend on this ``feature.'' +When +.I awk +was changed to match its documentation, this option was added to +accomodate applications that depended upon the old behavior. +(This feature was agreed upon by both the AT&T and GNU developers.) +.PP +The +.B \-W +option for implementation specific features is from the \*(PX standard. +.PP +When processing arguments, +.I gawk +uses the special option ``\fB\-\^\-\fP'' to signal the end of +arguments, and warns about, but otherwise ignores, undefined options. +.PP +The AWK book does not define the return value of +.BR srand() . +The System V Release 4 version of \*(UX +.I awk +(and the \*(PX standard) +has it return the seed it was using, to allow keeping track +of random number sequences. Therefore +.B srand() +in +.I gawk +also returns its current seed. +.PP +Other new features are: +The use of multiple +.B \-f +options (from MKS +.IR awk ); +the +.B ENVIRON +array; the +.BR \ea , +and +.BR \ev +escape sequences (done originally in +.I gawk +and fed back into AT&T's); the +.B tolower() +and +.B toupper() +built-in functions (from AT&T); and the \*(AN C conversion specifications in +.B printf +(done first in AT&T's version). +.SH GNU EXTENSIONS +.I Gawk +has some extensions to \*(PX +.IR awk . +They are described in this section. All the extensions described here +can be disabled by +invoking +.I gawk +with the +.B "\-W compat" +option. +.PP +The following features of +.I gawk +are not available in +\*(PX +.IR awk . +.RS +.TP \w'\(bu'u+1n +\(bu +The +.B \ex +escape sequence. +.TP +\(bu +The +.B systime() +and +.B strftime() +functions. +.TP +\(bu +The special file names available for I/O redirection are not recognized. +.TP +\(bu +The +.B ARGIND +and +.B ERRNO +variables are not special. +.TP +\(bu +The +.B IGNORECASE +variable and its side-effects are not available. +.TP +\(bu +The +.B FIELDWIDTHS +variable and fixed width field splitting. +.TP +\(bu +No path search is performed for files named via the +.B \-f +option. Therefore the +.B AWKPATH +environment variable is not special. +.TP +\(bu +The use of +.B "next file" +to abandon processing of the current input file. +.RE +.PP +The AWK book does not define the return value of the +.B close() +function. +.IR Gawk\^ 's +.B close() +returns the value from +.IR fclose (3), +or +.IR pclose (3), +when closing a file or pipe, respectively. +.PP +When +.I gawk +is invoked with the +.B "\-W compat" +option, +if the +.I fs +argument to the +.B \-F +option is ``t'', then +.B FS +will be set to the tab character. +Since this is a rather ugly special case, it is not the default behavior. +This behavior also does not occur if +.B \-Wposix +has been specified. +.ig +.PP +If +.I gawk +was compiled for debugging, it will +accept the following additional options: +.TP +.PD 0 +.B \-Wparsedebug +.TP +.PD +.B \-\^\-parsedebug +Turn on +.IR yacc (1) +or +.IR bison (1) +debugging output during program parsing. +This option should only be of interest to the +.I gawk +maintainers, and may not even be compiled into +.IR gawk . +.. +.SH HISTORICAL FEATURES +There are two features of historical AWK implementations that +.I gawk +supports. +First, it is possible to call the +.B length() +built-in function not only with no argument, but even without parentheses! +Thus, +.RS +.PP +.ft B +a = length +.ft R +.RE +.PP +is the same as either of +.RS +.PP +.ft B +a = length() +.br +a = length($0) +.ft R +.RE +.PP +This feature is marked as ``deprecated'' in the \*(PX standard, and +.I gawk +will issue a warning about its use if +.B \-Wlint +is specified on the command line. +.PP +The other feature is the use of the +.B continue +statement outside the body of a +.BR while , +.BR for , +or +.B do +loop. Traditional AWK implementations have treated such usage as +equivalent to the +.B next +statement. +.I Gawk +will support this usage if +.B \-Wposix +has not been specified. +.SH BUGS +The +.B \-F +option is not necessary given the command line variable assignment feature; +it remains only for backwards compatibility. +.PP +If your system actually has support for +.B /dev/fd +and the associated +.BR /dev/stdin , +.BR /dev/stdout , +and +.B /dev/stderr +files, you may get different output from +.I gawk +than you would get on a system without those files. When +.I gawk +interprets these files internally, it synchronizes output to the standard +output with output to +.BR /dev/stdout , +while on a system with those files, the output is actually to different +open files. +Caveat Emptor. +.SH VERSION INFORMATION +This man page documents +.IR gawk , +version 2.15. +.PP +Starting with the 2.15 version of +.IR gawk , +the +.BR \-c , +.BR \-V , +.BR \-C , +.ig +.BR \-D , +.. +.BR \-a , +and +.B \-e +options of the 2.11 version are no longer recognized. +.SH AUTHORS +The original version of \*(UX +.I awk +was designed and implemented by Alfred Aho, +Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan +continues to maintain and enhance it. +.PP +Paul Rubin and Jay Fenlason, +of the Free Software Foundation, wrote +.IR gawk , +to be compatible with the original version of +.I awk +distributed in Seventh Edition \*(UX. +John Woods contributed a number of bug fixes. +David Trueman, with contributions +from Arnold Robbins, made +.I gawk +compatible with the new version of \*(UX +.IR awk . +.PP +The initial DOS port was done by Conrad Kwok and Scott Garfinkle. +Scott Deifik is the current DOS maintainer. Pat Rankin did the +port to VMS, and Michal Jaegermann did the port to the Atari ST. +.SH ACKNOWLEDGEMENTS +Brian Kernighan of Bell Labs +provided valuable assistance during testing and debugging. +We thank him. diff --git a/gnu/usr.bin/awk/awk.h b/gnu/usr.bin/awk/awk.h new file mode 100644 index 0000000..ca3997f --- /dev/null +++ b/gnu/usr.bin/awk/awk.h @@ -0,0 +1,763 @@ +/* + * awk.h -- Definitions for gawk. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* ------------------------------ Includes ------------------------------ */ +#include <stdio.h> +#include <limits.h> +#include <ctype.h> +#include <setjmp.h> +#include <varargs.h> +#include <time.h> +#include <errno.h> +#if !defined(errno) && !defined(MSDOS) +extern int errno; +#endif +#ifdef __GNU_LIBRARY__ +#ifndef linux +#include <signum.h> +#endif +#endif + +/* ----------------- System dependencies (with more includes) -----------*/ + +#if !defined(VMS) || (!defined(VAXC) && !defined(__DECC)) +#include <sys/types.h> +#include <sys/stat.h> +#else /* VMS w/ VAXC or DECC */ +#include <types.h> +#include <stat.h> +#include <file.h> /* avoid <fcntl.h> in io.c */ +#endif + +#include <signal.h> + +#include "config.h" + +#ifdef __STDC__ +#define P(s) s +#define MALLOC_ARG_T size_t +#else +#define P(s) () +#define MALLOC_ARG_T unsigned +#define volatile +#define const +#endif + +#ifndef SIGTYPE +#define SIGTYPE void +#endif + +#ifdef SIZE_T_MISSING +typedef unsigned int size_t; +#endif + +#ifndef SZTC +#define SZTC +#define INTC +#endif + +#ifdef STDC_HEADERS +#include <stdlib.h> +#include <string.h> +#ifdef NeXT +#include <libc.h> +#undef atof +#else +#if defined(atarist) || defined(VMS) +#include <unixlib.h> +#else /* atarist || VMS */ +#ifndef MSDOS +#include <unistd.h> +#endif /* MSDOS */ +#endif /* atarist || VMS */ +#endif /* Next */ +#else /* STDC_HEADERS */ +#include "protos.h" +#endif /* STDC_HEADERS */ + +#if defined(ultrix) && !defined(Ultrix41) +extern char * getenv P((char *name)); +extern double atof P((char *s)); +#endif + +#ifndef __GNUC__ +#ifdef sparc +/* nasty nasty SunOS-ism */ +#include <alloca.h> +#ifdef lint +extern char *alloca(); +#endif +#else /* not sparc */ +#if !defined(alloca) && !defined(ALLOCA_PROTO) +extern char *alloca(); +#endif +#endif /* sparc */ +#endif /* __GNUC__ */ + +#ifdef HAVE_UNDERSCORE_SETJMP +/* nasty nasty berkelixm */ +#define setjmp _setjmp +#define longjmp _longjmp +#endif + +/* + * if you don't have vprintf, try this and cross your fingers. + */ +#if defined(VPRINTF_MISSING) +#define vfprintf(fp,fmt,arg) _doprnt((fmt), (arg), (fp)) +#endif + +#ifdef VMS +/* some macros to redirect to code in vms/vms_misc.c */ +#define exit vms_exit +#define open vms_open +#define strerror vms_strerror +#define strdup vms_strdup +extern void exit P((int)); +extern int open P((const char *,int,...)); +extern char *strerror P((int)); +extern char *strdup P((const char *str)); +extern int vms_devopen P((const char *,int)); +# ifndef NO_TTY_FWRITE +#define fwrite tty_fwrite +#define fclose tty_fclose +extern size_t fwrite P((const void *,size_t,size_t,FILE *)); +extern int fclose P((FILE *)); +# endif +extern FILE *popen P((const char *,const char *)); +extern int pclose P((FILE *)); +extern void vms_arg_fixup P((int *,char ***)); +/* some things not in STDC_HEADERS */ +extern int gnu_strftime P((char *,size_t,const char *,const struct tm *)); +extern int unlink P((const char *)); +extern int getopt P((int,char **,char *)); +extern int isatty P((int)); +#ifndef fileno +extern int fileno P((FILE *)); +#endif +extern int close(), dup(), dup2(), fstat(), read(), stat(); +#endif /*VMS*/ + +#ifdef MSDOS +#include <io.h> +extern FILE *popen P((char *, char *)); +extern int pclose P((FILE *)); +#endif + +#define GNU_REGEX +#ifdef GNU_REGEX +#include "regex.h" +#include "dfa.h" +typedef struct Regexp { + struct re_pattern_buffer pat; + struct re_registers regs; + struct regexp dfareg; + int dfa; +} Regexp; +#define RESTART(rp,s) (rp)->regs.start[0] +#define REEND(rp,s) (rp)->regs.end[0] +#else /* GNU_REGEX */ +#endif /* GNU_REGEX */ + +#ifdef atarist +#define read _text_read /* we do not want all these CR's to mess our input */ +extern int _text_read (int, char *, int); +#endif + +#ifndef DEFPATH +#define DEFPATH ".:/usr/local/lib/awk:/usr/lib/awk" +#endif + +#ifndef ENVSEP +#define ENVSEP ':' +#endif + +/* ------------------ Constants, Structures, Typedefs ------------------ */ +#define AWKNUM double + +typedef enum { + /* illegal entry == 0 */ + Node_illegal, + + /* binary operators lnode and rnode are the expressions to work on */ + Node_times, + Node_quotient, + Node_mod, + Node_plus, + Node_minus, + Node_cond_pair, /* conditional pair (see Node_line_range) */ + Node_subscript, + Node_concat, + Node_exp, + + /* unary operators subnode is the expression to work on */ +/*10*/ Node_preincrement, + Node_predecrement, + Node_postincrement, + Node_postdecrement, + Node_unary_minus, + Node_field_spec, + + /* assignments lnode is the var to assign to, rnode is the exp */ + Node_assign, + Node_assign_times, + Node_assign_quotient, + Node_assign_mod, +/*20*/ Node_assign_plus, + Node_assign_minus, + Node_assign_exp, + + /* boolean binaries lnode and rnode are expressions */ + Node_and, + Node_or, + + /* binary relationals compares lnode and rnode */ + Node_equal, + Node_notequal, + Node_less, + Node_greater, + Node_leq, +/*30*/ Node_geq, + Node_match, + Node_nomatch, + + /* unary relationals works on subnode */ + Node_not, + + /* program structures */ + Node_rule_list, /* lnode is a rule, rnode is rest of list */ + Node_rule_node, /* lnode is pattern, rnode is statement */ + Node_statement_list, /* lnode is statement, rnode is more list */ + Node_if_branches, /* lnode is to run on true, rnode on false */ + Node_expression_list, /* lnode is an exp, rnode is more list */ + Node_param_list, /* lnode is a variable, rnode is more list */ + + /* keywords */ +/*40*/ Node_K_if, /* lnode is conditonal, rnode is if_branches */ + Node_K_while, /* lnode is condtional, rnode is stuff to run */ + Node_K_for, /* lnode is for_struct, rnode is stuff to run */ + Node_K_arrayfor, /* lnode is for_struct, rnode is stuff to run */ + Node_K_break, /* no subs */ + Node_K_continue, /* no stuff */ + Node_K_print, /* lnode is exp_list, rnode is redirect */ + Node_K_printf, /* lnode is exp_list, rnode is redirect */ + Node_K_next, /* no subs */ + Node_K_exit, /* subnode is return value, or NULL */ +/*50*/ Node_K_do, /* lnode is conditional, rnode stuff to run */ + Node_K_return, + Node_K_delete, + Node_K_getline, + Node_K_function, /* lnode is statement list, rnode is params */ + + /* I/O redirection for print statements */ + Node_redirect_output, /* subnode is where to redirect */ + Node_redirect_append, /* subnode is where to redirect */ + Node_redirect_pipe, /* subnode is where to redirect */ + Node_redirect_pipein, /* subnode is where to redirect */ + Node_redirect_input, /* subnode is where to redirect */ + + /* Variables */ +/*60*/ Node_var, /* rnode is value, lnode is array stuff */ + Node_var_array, /* array is ptr to elements, asize num of + * eles */ + Node_val, /* node is a value - type in flags */ + + /* Builtins subnode is explist to work on, proc is func to call */ + Node_builtin, + + /* + * pattern: conditional ',' conditional ; lnode of Node_line_range + * is the two conditionals (Node_cond_pair), other word (rnode place) + * is a flag indicating whether or not this range has been entered. + */ + Node_line_range, + + /* + * boolean test of membership in array lnode is string-valued + * expression rnode is array name + */ + Node_in_array, + + Node_func, /* lnode is param. list, rnode is body */ + Node_func_call, /* lnode is name, rnode is argument list */ + + Node_cond_exp, /* lnode is conditonal, rnode is if_branches */ + Node_regex, +/*70*/ Node_hashnode, + Node_ahash, + Node_NF, + Node_NR, + Node_FNR, + Node_FS, + Node_RS, + Node_FIELDWIDTHS, + Node_IGNORECASE, + Node_OFS, + Node_ORS, + Node_OFMT, + Node_CONVFMT, + Node_K_nextfile +} NODETYPE; + +/* + * NOTE - this struct is a rather kludgey -- it is packed to minimize + * space usage, at the expense of cleanliness. Alter at own risk. + */ +typedef struct exp_node { + union { + struct { + union { + struct exp_node *lptr; + char *param_name; + } l; + union { + struct exp_node *rptr; + struct exp_node *(*pptr) (); + Regexp *preg; + struct for_loop_header *hd; + struct exp_node **av; + int r_ent; /* range entered */ + } r; + union { + char *name; + struct exp_node *extra; + } x; + short number; + unsigned char reflags; +# define CASE 1 +# define CONST 2 +# define FS_DFLT 4 + } nodep; + struct { + AWKNUM fltnum; /* this is here for optimal packing of + * the structure on many machines + */ + char *sp; + size_t slen; + unsigned char sref; + char idx; + } val; + struct { + struct exp_node *next; + char *name; + int length; + struct exp_node *value; + } hash; +#define hnext sub.hash.next +#define hname sub.hash.name +#define hlength sub.hash.length +#define hvalue sub.hash.value + struct { + struct exp_node *next; + struct exp_node *name; + struct exp_node *value; + } ahash; +#define ahnext sub.ahash.next +#define ahname sub.ahash.name +#define ahvalue sub.ahash.value + } sub; + NODETYPE type; + unsigned short flags; +# define MALLOC 1 /* can be free'd */ +# define TEMP 2 /* should be free'd */ +# define PERM 4 /* can't be free'd */ +# define STRING 8 /* assigned as string */ +# define STR 16 /* string value is current */ +# define NUM 32 /* numeric value is current */ +# define NUMBER 64 /* assigned as number */ +# define MAYBE_NUM 128 /* user input: if NUMERIC then + * a NUMBER + */ + char *vname; /* variable's name */ +} NODE; + +#define lnode sub.nodep.l.lptr +#define nextp sub.nodep.l.lptr +#define rnode sub.nodep.r.rptr +#define source_file sub.nodep.x.name +#define source_line sub.nodep.number +#define param_cnt sub.nodep.number +#define param sub.nodep.l.param_name + +#define subnode lnode +#define proc sub.nodep.r.pptr + +#define re_reg sub.nodep.r.preg +#define re_flags sub.nodep.reflags +#define re_text lnode +#define re_exp sub.nodep.x.extra +#define re_cnt sub.nodep.number + +#define forsub lnode +#define forloop rnode->sub.nodep.r.hd + +#define stptr sub.val.sp +#define stlen sub.val.slen +#define stref sub.val.sref +#define stfmt sub.val.idx + +#define numbr sub.val.fltnum + +#define var_value lnode +#define var_array sub.nodep.r.av + +#define condpair lnode +#define triggered sub.nodep.r.r_ent + +#ifdef DONTDEF +int primes[] = {31, 61, 127, 257, 509, 1021, 2053, 4099, 8191, 16381}; +#endif +/* a quick profile suggests that the following is a good value */ +#define HASHSIZE 127 + +typedef struct for_loop_header { + NODE *init; + NODE *cond; + NODE *incr; +} FOR_LOOP_HEADER; + +/* for "for(iggy in foo) {" */ +struct search { + NODE **arr_ptr; + NODE **arr_end; + NODE *bucket; + NODE *retval; +}; + +/* for faster input, bypass stdio */ +typedef struct iobuf { + int fd; + char *buf; + char *off; + char *end; + size_t size; /* this will be determined by an fstat() call */ + int cnt; + long secsiz; + int flag; +# define IOP_IS_TTY 1 +# define IOP_IS_INTERNAL 2 +# define IOP_NO_FREE 4 +} IOBUF; + +typedef void (*Func_ptr)(); + +/* + * structure used to dynamically maintain a linked-list of open files/pipes + */ +struct redirect { + unsigned int flag; +# define RED_FILE 1 +# define RED_PIPE 2 +# define RED_READ 4 +# define RED_WRITE 8 +# define RED_APPEND 16 +# define RED_NOBUF 32 +# define RED_USED 64 +# define RED_EOF 128 + char *value; + FILE *fp; + IOBUF *iop; + int pid; + int status; + struct redirect *prev; + struct redirect *next; +}; + +/* structure for our source, either a command line string or a source file */ +struct src { + enum srctype { CMDLINE = 1, SOURCEFILE } stype; + char *val; +}; + +/* longjmp return codes, must be nonzero */ +/* Continue means either for loop/while continue, or next input record */ +#define TAG_CONTINUE 1 +/* Break means either for/while break, or stop reading input */ +#define TAG_BREAK 2 +/* Return means return from a function call; leave value in ret_node */ +#define TAG_RETURN 3 + +#define HUGE INT_MAX + +/* -------------------------- External variables -------------------------- */ +/* gawk builtin variables */ +extern int NF; +extern int NR; +extern int FNR; +extern int IGNORECASE; +extern char *RS; +extern char *OFS; +extern int OFSlen; +extern char *ORS; +extern int ORSlen; +extern char *OFMT; +extern char *CONVFMT; +extern int CONVFMTidx; +extern int OFMTidx; +extern NODE *FS_node, *NF_node, *RS_node, *NR_node; +extern NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node; +extern NODE *CONVFMT_node; +extern NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node; +extern NODE *IGNORECASE_node; +extern NODE *FIELDWIDTHS_node; + +extern NODE **stack_ptr; +extern NODE *Nnull_string; +extern NODE **fields_arr; +extern int sourceline; +extern char *source; +extern NODE *expression_value; + +extern NODE *_t; /* used as temporary in tree_eval */ + +extern const char *myname; + +extern NODE *nextfree; +extern int field0_valid; +extern int do_unix; +extern int do_posix; +extern int do_lint; +extern int in_begin_rule; +extern int in_end_rule; + +/* ------------------------- Pseudo-functions ------------------------- */ + +#define is_identchar(c) (isalnum(c) || (c) == '_') + + +#ifndef MPROF +#define getnode(n) if (nextfree) n = nextfree, nextfree = nextfree->nextp;\ + else n = more_nodes() +#define freenode(n) ((n)->nextp = nextfree, nextfree = (n)) +#else +#define getnode(n) emalloc(n, NODE *, sizeof(NODE), "getnode") +#define freenode(n) free(n) +#endif + +#ifdef DEBUG +#define tree_eval(t) r_tree_eval(t) +#else +#define tree_eval(t) (_t = (t),(_t) == NULL ? Nnull_string : \ + ((_t)->type == Node_val ? (_t) : \ + ((_t)->type == Node_var ? (_t)->var_value : \ + ((_t)->type == Node_param_list ? \ + (stack_ptr[(_t)->param_cnt])->var_value : \ + r_tree_eval((_t)))))) +#endif + +#define make_number(x) mk_number((x), (MALLOC|NUM|NUMBER)) +#define tmp_number(x) mk_number((x), (MALLOC|TEMP|NUM|NUMBER)) + +#define free_temp(n) do {if ((n)->flags&TEMP) { unref(n); }} while (0) +#define make_string(s,l) make_str_node((s), SZTC (l),0) +#define SCAN 1 +#define ALREADY_MALLOCED 2 + +#define cant_happen() fatal("internal error line %d, file: %s", \ + __LINE__, __FILE__); + +#if defined(__STDC__) && !defined(NO_TOKEN_PASTING) +#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\ + (fatal("%s: %s: can't allocate memory (%s)",\ + (str), #var, strerror(errno)),0)) +#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\ + (MALLOC_ARG_T)(x))) ||\ + (fatal("%s: %s: can't allocate memory (%s)",\ + (str), #var, strerror(errno)),0)) +#else /* __STDC__ */ +#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\ + (fatal("%s: %s: can't allocate memory (%s)",\ + (str), "var", strerror(errno)),0)) +#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\ + (MALLOC_ARG_T)(x))) ||\ + (fatal("%s: %s: can't allocate memory (%s)",\ + (str), "var", strerror(errno)),0)) +#endif /* __STDC__ */ + +#ifdef DEBUG +#define force_number r_force_number +#define force_string r_force_string +#else /* not DEBUG */ +#ifdef lint +extern AWKNUM force_number(); +#endif +#ifdef MSDOS +extern double _msc51bug; +#define force_number(n) (_msc51bug=(_t = (n),(_t->flags & NUM) ? _t->numbr : r_force_number(_t))) +#else /* not MSDOS */ +#define force_number(n) (_t = (n),(_t->flags & NUM) ? _t->numbr : r_force_number(_t)) +#endif /* MSDOS */ +#define force_string(s) (_t = (s),(_t->flags & STR) ? _t : r_force_string(_t)) +#endif /* not DEBUG */ + +#define STREQ(a,b) (*(a) == *(b) && strcmp((a), (b)) == 0) +#define STREQN(a,b,n) ((n)&& *(a)== *(b) && strncmp((a), (b), SZTC (n)) == 0) + +/* ------------- Function prototypes or defs (as appropriate) ------------- */ + +/* array.c */ +extern NODE *concat_exp P((NODE *tree)); +extern void assoc_clear P((NODE *symbol)); +extern unsigned int hash P((char *s, int len)); +extern int in_array P((NODE *symbol, NODE *subs)); +extern NODE **assoc_lookup P((NODE *symbol, NODE *subs)); +extern void do_delete P((NODE *symbol, NODE *tree)); +extern void assoc_scan P((NODE *symbol, struct search *lookat)); +extern void assoc_next P((struct search *lookat)); +/* awk.tab.c */ +extern char *tokexpand P((void)); +extern char nextc P((void)); +extern NODE *node P((NODE *left, NODETYPE op, NODE *right)); +extern NODE *install P((char *name, NODE *value)); +extern NODE *lookup P((char *name)); +extern NODE *variable P((char *name, int can_free)); +extern int yyparse P((void)); +/* builtin.c */ +extern NODE *do_exp P((NODE *tree)); +extern NODE *do_index P((NODE *tree)); +extern NODE *do_int P((NODE *tree)); +extern NODE *do_length P((NODE *tree)); +extern NODE *do_log P((NODE *tree)); +extern NODE *do_sprintf P((NODE *tree)); +extern void do_printf P((NODE *tree)); +extern void print_simple P((NODE *tree, FILE *fp)); +extern NODE *do_sqrt P((NODE *tree)); +extern NODE *do_substr P((NODE *tree)); +extern NODE *do_strftime P((NODE *tree)); +extern NODE *do_systime P((NODE *tree)); +extern NODE *do_system P((NODE *tree)); +extern void do_print P((NODE *tree)); +extern NODE *do_tolower P((NODE *tree)); +extern NODE *do_toupper P((NODE *tree)); +extern NODE *do_atan2 P((NODE *tree)); +extern NODE *do_sin P((NODE *tree)); +extern NODE *do_cos P((NODE *tree)); +extern NODE *do_rand P((NODE *tree)); +extern NODE *do_srand P((NODE *tree)); +extern NODE *do_match P((NODE *tree)); +extern NODE *do_gsub P((NODE *tree)); +extern NODE *do_sub P((NODE *tree)); +/* eval.c */ +extern int interpret P((NODE *volatile tree)); +extern NODE *r_tree_eval P((NODE *tree)); +extern int cmp_nodes P((NODE *t1, NODE *t2)); +extern NODE **get_lhs P((NODE *ptr, Func_ptr *assign)); +extern void set_IGNORECASE P((void)); +void set_OFS P((void)); +void set_ORS P((void)); +void set_OFMT P((void)); +void set_CONVFMT P((void)); +/* field.c */ +extern void init_fields P((void)); +extern void set_record P((char *buf, int cnt, int freeold)); +extern void reset_record P((void)); +extern void set_NF P((void)); +extern NODE **get_field P((int num, Func_ptr *assign)); +extern NODE *do_split P((NODE *tree)); +extern void set_FS P((void)); +extern void set_RS P((void)); +extern void set_FIELDWIDTHS P((void)); +/* io.c */ +extern void set_FNR P((void)); +extern void set_NR P((void)); +extern void do_input P((void)); +extern struct redirect *redirect P((NODE *tree, int *errflg)); +extern NODE *do_close P((NODE *tree)); +extern int flush_io P((void)); +extern int close_io P((void)); +extern int devopen P((char *name, char *mode)); +extern int pathopen P((char *file)); +extern NODE *do_getline P((NODE *tree)); +extern void do_nextfile P((void)); +/* iop.c */ +extern int optimal_bufsize P((int fd)); +extern IOBUF *iop_alloc P((int fd)); +extern int get_a_record P((char **out, IOBUF *iop, int rs, int *errcode)); +/* main.c */ +extern int main P((int argc, char **argv)); +extern Regexp *mk_re_parse P((char *s, int ignorecase)); +extern void load_environ P((void)); +extern char *arg_assign P((char *arg)); +extern SIGTYPE catchsig P((int sig, int code)); +/* msg.c */ +#ifdef MSDOS +extern void err P((char *s, char *emsg, char *va_list, ...)); +extern void msg P((char *va_alist, ...)); +extern void warning P((char *va_alist, ...)); +extern void fatal P((char *va_alist, ...)); +#else +extern void err (); +extern void msg (); +extern void warning (); +extern void fatal (); +#endif +/* node.c */ +extern AWKNUM r_force_number P((NODE *n)); +extern NODE *r_force_string P((NODE *s)); +extern NODE *dupnode P((NODE *n)); +extern NODE *mk_number P((AWKNUM x, unsigned int flags)); +extern NODE *make_str_node P((char *s, size_t len, int scan )); +extern NODE *tmp_string P((char *s, size_t len )); +extern NODE *more_nodes P((void)); +#ifdef DEBUG +extern void freenode P((NODE *it)); +#endif +extern void unref P((NODE *tmp)); +extern int parse_escape P((char **string_ptr)); +/* re.c */ +extern Regexp *make_regexp P((char *s, int len, int ignorecase, int dfa)); +extern int research P((Regexp *rp, char *str, int start, int len, int need_start)); +extern void refree P((Regexp *rp)); +extern void reg_error P((const char *s)); +extern Regexp *re_update P((NODE *t)); +extern void resyntax P((int syntax)); +extern void resetup P((void)); + +/* strcase.c */ +extern int strcasecmp P((const char *s1, const char *s2)); +extern int strncasecmp P((const char *s1, const char *s2, register size_t n)); + +#ifdef atarist +/* atari/tmpnam.c */ +extern char *tmpnam P((char *buf)); +extern char *tempnam P((const char *path, const char *base)); +#endif + +/* Figure out what '\a' really is. */ +#ifdef __STDC__ +#define BELL '\a' /* sure makes life easy, don't it? */ +#else +# if 'z' - 'a' == 25 /* ascii */ +# if 'a' != 97 /* machine is dumb enough to use mark parity */ +# define BELL '\207' +# else +# define BELL '\07' +# endif +# else +# define BELL '\057' +# endif +#endif + +extern char casetable[]; /* for case-independent regexp matching */ diff --git a/gnu/usr.bin/awk/awk.y b/gnu/usr.bin/awk/awk.y new file mode 100644 index 0000000..6e87f1c --- /dev/null +++ b/gnu/usr.bin/awk/awk.y @@ -0,0 +1,1804 @@ +/* + * awk.y --- yacc/bison parser + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +%{ +#ifdef DEBUG +#define YYDEBUG 12 +#endif + +#include "awk.h" + +static void yyerror (); /* va_alist */ +static char *get_src_buf P((void)); +static int yylex P((void)); +static NODE *node_common P((NODETYPE op)); +static NODE *snode P((NODE *subn, NODETYPE op, int sindex)); +static NODE *mkrangenode P((NODE *cpair)); +static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr)); +static NODE *append_right P((NODE *list, NODE *new)); +static void func_install P((NODE *params, NODE *def)); +static void pop_var P((NODE *np, int freeit)); +static void pop_params P((NODE *params)); +static NODE *make_param P((char *name)); +static NODE *mk_rexp P((NODE *exp)); + +static int want_assign; /* lexical scanning kludge */ +static int want_regexp; /* lexical scanning kludge */ +static int can_return; /* lexical scanning kludge */ +static int io_allowed = 1; /* lexical scanning kludge */ +static char *lexptr; /* pointer to next char during parsing */ +static char *lexend; +static char *lexptr_begin; /* keep track of where we were for error msgs */ +static char *lexeme; /* beginning of lexeme for debugging */ +static char *thisline = NULL; +#define YYDEBUG_LEXER_TEXT (lexeme) +static int param_counter; +static char *tokstart = NULL; +static char *token = NULL; +static char *tokend; + +NODE *variables[HASHSIZE]; + +extern char *source; +extern int sourceline; +extern struct src *srcfiles; +extern int numfiles; +extern int errcount; +extern NODE *begin_block; +extern NODE *end_block; +%} + +%union { + long lval; + AWKNUM fval; + NODE *nodeval; + NODETYPE nodetypeval; + char *sval; + NODE *(*ptrval)(); +} + +%type <nodeval> function_prologue function_body +%type <nodeval> rexp exp start program rule simp_exp +%type <nodeval> non_post_simp_exp +%type <nodeval> pattern +%type <nodeval> action variable param_list +%type <nodeval> rexpression_list opt_rexpression_list +%type <nodeval> expression_list opt_expression_list +%type <nodeval> statements statement if_statement opt_param_list +%type <nodeval> opt_exp opt_variable regexp +%type <nodeval> input_redir output_redir +%type <nodetypeval> print +%type <sval> func_name +%type <lval> lex_builtin + +%token <sval> FUNC_CALL NAME REGEXP +%token <lval> ERROR +%token <nodeval> YNUMBER YSTRING +%token <nodetypeval> RELOP APPEND_OP +%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP +%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE +%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE +%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION +%token <nodetypeval> LEX_GETLINE +%token <nodetypeval> LEX_IN +%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT +%token <lval> LEX_BUILTIN LEX_LENGTH + +/* these are just yylval numbers */ + +/* Lowest to highest */ +%right ASSIGNOP +%right '?' ':' +%left LEX_OR +%left LEX_AND +%left LEX_GETLINE +%nonassoc LEX_IN +%left FUNC_CALL LEX_BUILTIN LEX_LENGTH +%nonassoc MATCHOP +%nonassoc RELOP '<' '>' '|' APPEND_OP +%left CONCAT_OP +%left YSTRING YNUMBER +%left '+' '-' +%left '*' '/' '%' +%right '!' UNARY +%right '^' +%left INCREMENT DECREMENT +%left '$' +%left '(' ')' +%% + +start + : opt_nls program opt_nls + { expression_value = $2; } + ; + +program + : rule + { + if ($1 != NULL) + $$ = $1; + else + $$ = NULL; + yyerrok; + } + | program rule + /* add the rule to the tail of list */ + { + if ($2 == NULL) + $$ = $1; + else if ($1 == NULL) + $$ = $2; + else { + if ($1->type != Node_rule_list) + $1 = node($1, Node_rule_list, + (NODE*)NULL); + $$ = append_right ($1, + node($2, Node_rule_list,(NODE *) NULL)); + } + yyerrok; + } + | error { $$ = NULL; } + | program error { $$ = NULL; } + ; + +rule + : LEX_BEGIN { io_allowed = 0; } + action + { + if (begin_block) { + if (begin_block->type != Node_rule_list) + begin_block = node(begin_block, Node_rule_list, + (NODE *)NULL); + (void) append_right (begin_block, node( + node((NODE *)NULL, Node_rule_node, $3), + Node_rule_list, (NODE *)NULL) ); + } else + begin_block = node((NODE *)NULL, Node_rule_node, $3); + $$ = NULL; + io_allowed = 1; + yyerrok; + } + | LEX_END { io_allowed = 0; } + action + { + if (end_block) { + if (end_block->type != Node_rule_list) + end_block = node(end_block, Node_rule_list, + (NODE *)NULL); + (void) append_right (end_block, node( + node((NODE *)NULL, Node_rule_node, $3), + Node_rule_list, (NODE *)NULL)); + } else + end_block = node((NODE *)NULL, Node_rule_node, $3); + $$ = NULL; + io_allowed = 1; + yyerrok; + } + | LEX_BEGIN statement_term + { + warning("BEGIN blocks must have an action part"); + errcount++; + yyerrok; + } + | LEX_END statement_term + { + warning("END blocks must have an action part"); + errcount++; + yyerrok; + } + | pattern action + { $$ = node ($1, Node_rule_node, $2); yyerrok; } + | action + { $$ = node ((NODE *)NULL, Node_rule_node, $1); yyerrok; } + | pattern statement_term + { + $$ = node ($1, + Node_rule_node, + node(node(node(make_number(0.0), + Node_field_spec, + (NODE *) NULL), + Node_expression_list, + (NODE *) NULL), + Node_K_print, + (NODE *) NULL)); + yyerrok; + } + | function_prologue function_body + { + func_install($1, $2); + $$ = NULL; + yyerrok; + } + ; + +func_name + : NAME + { $$ = $1; } + | FUNC_CALL + { $$ = $1; } + | lex_builtin + { + yyerror("%s() is a built-in function, it cannot be redefined", + tokstart); + errcount++; + /* yyerrok; */ + } + ; + +lex_builtin + : LEX_BUILTIN + | LEX_LENGTH + ; + +function_prologue + : LEX_FUNCTION + { + param_counter = 0; + } + func_name '(' opt_param_list r_paren opt_nls + { + $$ = append_right(make_param($3), $5); + can_return = 1; + } + ; + +function_body + : l_brace statements r_brace opt_semi + { + $$ = $2; + can_return = 0; + } + ; + + +pattern + : exp + { $$ = $1; } + | exp comma exp + { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); } + ; + +regexp + /* + * In this rule, want_regexp tells yylex that the next thing + * is a regexp so it should read up to the closing slash. + */ + : '/' + { ++want_regexp; } + REGEXP '/' + { + NODE *n; + int len; + + getnode(n); + n->type = Node_regex; + len = strlen($3); + n->re_exp = make_string($3, len); + n->re_reg = make_regexp($3, len, 0, 1); + n->re_text = NULL; + n->re_flags = CONST; + n->re_cnt = 1; + $$ = n; + } + ; + +action + : l_brace statements r_brace opt_semi opt_nls + { $$ = $2 ; } + | l_brace r_brace opt_semi opt_nls + { $$ = NULL; } + ; + +statements + : statement + { $$ = $1; } + | statements statement + { + if ($1 == NULL || $1->type != Node_statement_list) + $1 = node($1, Node_statement_list,(NODE *)NULL); + $$ = append_right($1, + node( $2, Node_statement_list, (NODE *)NULL)); + yyerrok; + } + | error + { $$ = NULL; } + | statements error + { $$ = NULL; } + ; + +statement_term + : nls + | semi opt_nls + ; + +statement + : semi opt_nls + { $$ = NULL; } + | l_brace r_brace + { $$ = NULL; } + | l_brace statements r_brace + { $$ = $2; } + | if_statement + { $$ = $1; } + | LEX_WHILE '(' exp r_paren opt_nls statement + { $$ = node ($3, Node_K_while, $6); } + | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls + { $$ = node ($6, Node_K_do, $3); } + | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement + { + $$ = node ($8, Node_K_arrayfor, make_for_loop(variable($3,1), + (NODE *)NULL, variable($5,1))); + } + | LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement + { + $$ = node($10, Node_K_for, (NODE *)make_for_loop($3, $5, $7)); + } + | LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement + { + $$ = node ($9, Node_K_for, + (NODE *)make_for_loop($3, (NODE *)NULL, $6)); + } + | LEX_BREAK statement_term + /* for break, maybe we'll have to remember where to break to */ + { $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); } + | LEX_CONTINUE statement_term + /* similarly */ + { $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); } + | print '(' expression_list r_paren output_redir statement_term + { $$ = node ($3, $1, $5); } + | print opt_rexpression_list output_redir statement_term + { + if ($1 == Node_K_print && $2 == NULL) + $2 = node(node(make_number(0.0), + Node_field_spec, + (NODE *) NULL), + Node_expression_list, + (NODE *) NULL); + + $$ = node ($2, $1, $3); + } + | LEX_NEXT opt_exp statement_term + { NODETYPE type; + + if ($2 && $2 == lookup("file")) { + if (do_lint) + warning("`next file' is a gawk extension"); + else if (do_unix || do_posix) + yyerror("`next file' is a gawk extension"); + else if (! io_allowed) + yyerror("`next file' used in BEGIN or END action"); + type = Node_K_nextfile; + } else { + if (! io_allowed) + yyerror("next used in BEGIN or END action"); + type = Node_K_next; + } + $$ = node ((NODE *)NULL, type, (NODE *)NULL); + } + | LEX_EXIT opt_exp statement_term + { $$ = node ($2, Node_K_exit, (NODE *)NULL); } + | LEX_RETURN + { if (! can_return) yyerror("return used outside function context"); } + opt_exp statement_term + { $$ = node ($3, Node_K_return, (NODE *)NULL); } + | LEX_DELETE NAME '[' expression_list ']' statement_term + { $$ = node (variable($2,1), Node_K_delete, $4); } + | exp statement_term + { $$ = $1; } + ; + +print + : LEX_PRINT + { $$ = $1; } + | LEX_PRINTF + { $$ = $1; } + ; + +if_statement + : LEX_IF '(' exp r_paren opt_nls statement + { + $$ = node($3, Node_K_if, + node($6, Node_if_branches, (NODE *)NULL)); + } + | LEX_IF '(' exp r_paren opt_nls statement + LEX_ELSE opt_nls statement + { $$ = node ($3, Node_K_if, + node ($6, Node_if_branches, $9)); } + ; + +nls + : NEWLINE + { want_assign = 0; } + | nls NEWLINE + ; + +opt_nls + : /* empty */ + | nls + ; + +input_redir + : /* empty */ + { $$ = NULL; } + | '<' simp_exp + { $$ = node ($2, Node_redirect_input, (NODE *)NULL); } + ; + +output_redir + : /* empty */ + { $$ = NULL; } + | '>' exp + { $$ = node ($2, Node_redirect_output, (NODE *)NULL); } + | APPEND_OP exp + { $$ = node ($2, Node_redirect_append, (NODE *)NULL); } + | '|' exp + { $$ = node ($2, Node_redirect_pipe, (NODE *)NULL); } + ; + +opt_param_list + : /* empty */ + { $$ = NULL; } + | param_list + { $$ = $1; } + ; + +param_list + : NAME + { $$ = make_param($1); } + | param_list comma NAME + { $$ = append_right($1, make_param($3)); yyerrok; } + | error + { $$ = NULL; } + | param_list error + { $$ = NULL; } + | param_list comma error + { $$ = NULL; } + ; + +/* optional expression, as in for loop */ +opt_exp + : /* empty */ + { $$ = NULL; } + | exp + { $$ = $1; } + ; + +opt_rexpression_list + : /* empty */ + { $$ = NULL; } + | rexpression_list + { $$ = $1; } + ; + +rexpression_list + : rexp + { $$ = node ($1, Node_expression_list, (NODE *)NULL); } + | rexpression_list comma rexp + { + $$ = append_right($1, + node( $3, Node_expression_list, (NODE *)NULL)); + yyerrok; + } + | error + { $$ = NULL; } + | rexpression_list error + { $$ = NULL; } + | rexpression_list error rexp + { $$ = NULL; } + | rexpression_list comma error + { $$ = NULL; } + ; + +opt_expression_list + : /* empty */ + { $$ = NULL; } + | expression_list + { $$ = $1; } + ; + +expression_list + : exp + { $$ = node ($1, Node_expression_list, (NODE *)NULL); } + | expression_list comma exp + { + $$ = append_right($1, + node( $3, Node_expression_list, (NODE *)NULL)); + yyerrok; + } + | error + { $$ = NULL; } + | expression_list error + { $$ = NULL; } + | expression_list error exp + { $$ = NULL; } + | expression_list comma error + { $$ = NULL; } + ; + +/* Expressions, not including the comma operator. */ +exp : variable ASSIGNOP + { want_assign = 0; } + exp + { + if (do_lint && $4->type == Node_regex) + warning("Regular expression on left of assignment."); + $$ = node ($1, $2, $4); + } + | '(' expression_list r_paren LEX_IN NAME + { $$ = node (variable($5,1), Node_in_array, $2); } + | exp '|' LEX_GETLINE opt_variable + { + $$ = node ($4, Node_K_getline, + node ($1, Node_redirect_pipein, (NODE *)NULL)); + } + | LEX_GETLINE opt_variable input_redir + { + if (do_lint && ! io_allowed && $3 == NULL) + warning("non-redirected getline undefined inside BEGIN or END action"); + $$ = node ($2, Node_K_getline, $3); + } + | exp LEX_AND exp + { $$ = node ($1, Node_and, $3); } + | exp LEX_OR exp + { $$ = node ($1, Node_or, $3); } + | exp MATCHOP exp + { + if ($1->type == Node_regex) + warning("Regular expression on left of MATCH operator."); + $$ = node ($1, $2, mk_rexp($3)); + } + | regexp + { $$ = $1; } + | '!' regexp %prec UNARY + { + $$ = node(node(make_number(0.0), + Node_field_spec, + (NODE *) NULL), + Node_nomatch, + $2); + } + | exp LEX_IN NAME + { $$ = node (variable($3,1), Node_in_array, $1); } + | exp RELOP exp + { + if (do_lint && $3->type == Node_regex) + warning("Regular expression on left of comparison."); + $$ = node ($1, $2, $3); + } + | exp '<' exp + { $$ = node ($1, Node_less, $3); } + | exp '>' exp + { $$ = node ($1, Node_greater, $3); } + | exp '?' exp ':' exp + { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} + | simp_exp + { $$ = $1; } + | exp simp_exp %prec CONCAT_OP + { $$ = node ($1, Node_concat, $2); } + ; + +rexp + : variable ASSIGNOP + { want_assign = 0; } + rexp + { $$ = node ($1, $2, $4); } + | rexp LEX_AND rexp + { $$ = node ($1, Node_and, $3); } + | rexp LEX_OR rexp + { $$ = node ($1, Node_or, $3); } + | LEX_GETLINE opt_variable input_redir + { + if (do_lint && ! io_allowed && $3 == NULL) + warning("non-redirected getline undefined inside BEGIN or END action"); + $$ = node ($2, Node_K_getline, $3); + } + | regexp + { $$ = $1; } + | '!' regexp %prec UNARY + { $$ = node((NODE *) NULL, Node_nomatch, $2); } + | rexp MATCHOP rexp + { $$ = node ($1, $2, mk_rexp($3)); } + | rexp LEX_IN NAME + { $$ = node (variable($3,1), Node_in_array, $1); } + | rexp RELOP rexp + { $$ = node ($1, $2, $3); } + | rexp '?' rexp ':' rexp + { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} + | simp_exp + { $$ = $1; } + | rexp simp_exp %prec CONCAT_OP + { $$ = node ($1, Node_concat, $2); } + ; + +simp_exp + : non_post_simp_exp + /* Binary operators in order of decreasing precedence. */ + | simp_exp '^' simp_exp + { $$ = node ($1, Node_exp, $3); } + | simp_exp '*' simp_exp + { $$ = node ($1, Node_times, $3); } + | simp_exp '/' simp_exp + { $$ = node ($1, Node_quotient, $3); } + | simp_exp '%' simp_exp + { $$ = node ($1, Node_mod, $3); } + | simp_exp '+' simp_exp + { $$ = node ($1, Node_plus, $3); } + | simp_exp '-' simp_exp + { $$ = node ($1, Node_minus, $3); } + | variable INCREMENT + { $$ = node ($1, Node_postincrement, (NODE *)NULL); } + | variable DECREMENT + { $$ = node ($1, Node_postdecrement, (NODE *)NULL); } + ; + +non_post_simp_exp + : '!' simp_exp %prec UNARY + { $$ = node ($2, Node_not,(NODE *) NULL); } + | '(' exp r_paren + { $$ = $2; } + | LEX_BUILTIN + '(' opt_expression_list r_paren + { $$ = snode ($3, Node_builtin, (int) $1); } + | LEX_LENGTH '(' opt_expression_list r_paren + { $$ = snode ($3, Node_builtin, (int) $1); } + | LEX_LENGTH + { + if (do_lint) + warning("call of `length' without parentheses is not portable"); + $$ = snode ((NODE *)NULL, Node_builtin, (int) $1); + if (do_posix) + warning( "call of `length' without parentheses is deprecated by POSIX"); + } + | FUNC_CALL '(' opt_expression_list r_paren + { + $$ = node ($3, Node_func_call, make_string($1, strlen($1))); + } + | variable + | INCREMENT variable + { $$ = node ($2, Node_preincrement, (NODE *)NULL); } + | DECREMENT variable + { $$ = node ($2, Node_predecrement, (NODE *)NULL); } + | YNUMBER + { $$ = $1; } + | YSTRING + { $$ = $1; } + + | '-' simp_exp %prec UNARY + { if ($2->type == Node_val) { + $2->numbr = -(force_number($2)); + $$ = $2; + } else + $$ = node ($2, Node_unary_minus, (NODE *)NULL); + } + | '+' simp_exp %prec UNARY + { $$ = $2; } + ; + +opt_variable + : /* empty */ + { $$ = NULL; } + | variable + { $$ = $1; } + ; + +variable + : NAME + { $$ = variable($1,1); } + | NAME '[' expression_list ']' + { + if ($3->rnode == NULL) { + $$ = node (variable($1,1), Node_subscript, $3->lnode); + freenode($3); + } else + $$ = node (variable($1,1), Node_subscript, $3); + } + | '$' non_post_simp_exp + { $$ = node ($2, Node_field_spec, (NODE *)NULL); } + ; + +l_brace + : '{' opt_nls + ; + +r_brace + : '}' opt_nls { yyerrok; } + ; + +r_paren + : ')' { yyerrok; } + ; + +opt_semi + : /* empty */ + | semi + ; + +semi + : ';' { yyerrok; want_assign = 0; } + ; + +comma : ',' opt_nls { yyerrok; } + ; + +%% + +struct token { + char *operator; /* text to match */ + NODETYPE value; /* node type */ + int class; /* lexical class */ + unsigned flags; /* # of args. allowed and compatability */ +# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */ +# define A(n) (1<<(n)) +# define VERSION 0xFF00 /* old awk is zero */ +# define NOT_OLD 0x0100 /* feature not in old awk */ +# define NOT_POSIX 0x0200 /* feature not in POSIX */ +# define GAWKX 0x0400 /* gawk extension */ + NODE *(*ptr) (); /* function that implements this keyword */ +}; + +extern NODE + *do_exp(), *do_getline(), *do_index(), *do_length(), + *do_sqrt(), *do_log(), *do_sprintf(), *do_substr(), + *do_split(), *do_system(), *do_int(), *do_close(), + *do_atan2(), *do_sin(), *do_cos(), *do_rand(), + *do_srand(), *do_match(), *do_tolower(), *do_toupper(), + *do_sub(), *do_gsub(), *do_strftime(), *do_systime(); + +/* Tokentab is sorted ascii ascending order, so it can be binary searched. */ + +static struct token tokentab[] = { +{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0}, +{"END", Node_illegal, LEX_END, 0, 0}, +{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2}, +{"break", Node_K_break, LEX_BREAK, 0, 0}, +{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close}, +{"continue", Node_K_continue, LEX_CONTINUE, 0, 0}, +{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos}, +{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0}, +{"do", Node_K_do, LEX_DO, NOT_OLD, 0}, +{"else", Node_illegal, LEX_ELSE, 0, 0}, +{"exit", Node_K_exit, LEX_EXIT, 0, 0}, +{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp}, +{"for", Node_K_for, LEX_FOR, 0, 0}, +{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, +{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0}, +{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0}, +{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, +{"if", Node_K_if, LEX_IF, 0, 0}, +{"in", Node_illegal, LEX_IN, 0, 0}, +{"index", Node_builtin, LEX_BUILTIN, A(2), do_index}, +{"int", Node_builtin, LEX_BUILTIN, A(1), do_int}, +{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length}, +{"log", Node_builtin, LEX_BUILTIN, A(1), do_log}, +{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match}, +{"next", Node_K_next, LEX_NEXT, 0, 0}, +{"print", Node_K_print, LEX_PRINT, 0, 0}, +{"printf", Node_K_printf, LEX_PRINTF, 0, 0}, +{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand}, +{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0}, +{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin}, +{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split}, +{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf}, +{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt}, +{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand}, +{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_strftime}, +{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, +{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, +{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, +{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime}, +{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower}, +{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper}, +{"while", Node_K_while, LEX_WHILE, 0, 0}, +}; + +/* VARARGS0 */ +static void +yyerror(va_alist) +va_dcl +{ + va_list args; + char *mesg = NULL; + register char *bp, *cp; + char *scan; + char buf[120]; + + errcount++; + /* Find the current line in the input file */ + if (lexptr) { + if (!thisline) { + cp = lexeme; + if (*cp == '\n') { + cp--; + mesg = "unexpected newline"; + } + for ( ; cp != lexptr_begin && *cp != '\n'; --cp) + ; + if (*cp == '\n') + cp++; + thisline = cp; + } + /* NL isn't guaranteed */ + bp = lexeme; + while (bp < lexend && *bp && *bp != '\n') + bp++; + } else { + thisline = "(END OF FILE)"; + bp = thisline + 13; + } + msg("%.*s", (int) (bp - thisline), thisline); + bp = buf; + cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */ + if (lexptr) { + scan = thisline; + while (bp < cp && scan < lexeme) + if (*scan++ == '\t') + *bp++ = '\t'; + else + *bp++ = ' '; + *bp++ = '^'; + *bp++ = ' '; + } + va_start(args); + if (mesg == NULL) + mesg = va_arg(args, char *); + strcpy(bp, mesg); + err("", buf, args); + va_end(args); + exit(2); +} + +static char * +get_src_buf() +{ + static int samefile = 0; + static int nextfile = 0; + static char *buf = NULL; + static int fd; + int n; + register char *scan; + static int len = 0; + static int did_newline = 0; +# define SLOP 128 /* enough space to hold most source lines */ + + if (nextfile > numfiles) + return NULL; + + if (srcfiles[nextfile].stype == CMDLINE) { + if (len == 0) { + len = strlen(srcfiles[nextfile].val); + sourceline = 1; + lexptr = lexptr_begin = srcfiles[nextfile].val; + lexend = lexptr + len; + } else if (!did_newline && *(lexptr-1) != '\n') { + /* + * The following goop is to ensure that the source + * ends with a newline and that the entire current + * line is available for error messages. + */ + int offset; + + did_newline = 1; + offset = lexptr - lexeme; + for (scan = lexeme; scan > lexptr_begin; scan--) + if (*scan == '\n') { + scan++; + break; + } + len = lexptr - scan; + emalloc(buf, char *, len+1, "get_src_buf"); + memcpy(buf, scan, len); + thisline = buf; + lexptr = buf + len; + *lexptr = '\n'; + lexeme = lexptr - offset; + lexptr_begin = buf; + lexend = lexptr + 1; + } else { + len = 0; + lexeme = lexptr = lexptr_begin = NULL; + } + if (lexptr == NULL && ++nextfile <= numfiles) + return get_src_buf(); + return lexptr; + } + if (!samefile) { + source = srcfiles[nextfile].val; + if (source == NULL) { + if (buf) { + free(buf); + buf = NULL; + } + len = 0; + return lexeme = lexptr = lexptr_begin = NULL; + } + fd = pathopen(source); + if (fd == -1) + fatal("can't open source file \"%s\" for reading (%s)", + source, strerror(errno)); + len = optimal_bufsize(fd); + if (buf) + free(buf); + emalloc(buf, char *, len + SLOP, "get_src_buf"); + lexptr_begin = buf + SLOP; + samefile = 1; + sourceline = 1; + } else { + /* + * Here, we retain the current source line (up to length SLOP) + * in the beginning of the buffer that was overallocated above + */ + int offset; + int linelen; + + offset = lexptr - lexeme; + for (scan = lexeme; scan > lexptr_begin; scan--) + if (*scan == '\n') { + scan++; + break; + } + linelen = lexptr - scan; + if (linelen > SLOP) + linelen = SLOP; + thisline = buf + SLOP - linelen; + memcpy(thisline, scan, linelen); + lexeme = buf + SLOP - offset; + lexptr_begin = thisline; + } + n = read(fd, buf + SLOP, len); + if (n == -1) + fatal("can't read sourcefile \"%s\" (%s)", + source, strerror(errno)); + if (n == 0) { + samefile = 0; + nextfile++; + len = 0; + return get_src_buf(); + } + lexptr = buf + SLOP; + lexend = lexptr + n; + return buf; +} + +#define tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token) + +char * +tokexpand() +{ + static int toksize = 60; + int tokoffset; + + tokoffset = token - tokstart; + toksize *= 2; + if (tokstart) + erealloc(tokstart, char *, toksize, "tokexpand"); + else + emalloc(tokstart, char *, toksize, "tokexpand"); + tokend = tokstart + toksize; + token = tokstart + tokoffset; + return token; +} + +#if DEBUG +char +nextc() { + if (lexptr && lexptr < lexend) + return *lexptr++; + else if (get_src_buf()) + return *lexptr++; + else + return '\0'; +} +#else +#define nextc() ((lexptr && lexptr < lexend) ? \ + *lexptr++ : \ + (get_src_buf() ? *lexptr++ : '\0') \ + ) +#endif +#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr) + +/* + * Read the input and turn it into tokens. + */ + +static int +yylex() +{ + register int c; + int seen_e = 0; /* These are for numbers */ + int seen_point = 0; + int esc_seen; /* for literal strings */ + int low, mid, high; + static int did_newline = 0; + char *tokkey; + + if (!nextc()) + return 0; + pushback(); + lexeme = lexptr; + thisline = NULL; + if (want_regexp) { + int in_brack = 0; + + want_regexp = 0; + token = tokstart; + while ((c = nextc()) != 0) { + switch (c) { + case '[': + in_brack = 1; + break; + case ']': + in_brack = 0; + break; + case '\\': + if ((c = nextc()) == '\0') { + yyerror("unterminated regexp ends with \\ at end of file"); + } else if (c == '\n') { + sourceline++; + continue; + } else + tokadd('\\'); + break; + case '/': /* end of the regexp */ + if (in_brack) + break; + + pushback(); + tokadd('\0'); + yylval.sval = tokstart; + return REGEXP; + case '\n': + pushback(); + yyerror("unterminated regexp"); + case '\0': + yyerror("unterminated regexp at end of file"); + } + tokadd(c); + } + } +retry: + while ((c = nextc()) == ' ' || c == '\t') + ; + + lexeme = lexptr ? lexptr - 1 : lexptr; + thisline = NULL; + token = tokstart; + yylval.nodetypeval = Node_illegal; + + switch (c) { + case 0: + return 0; + + case '\n': + sourceline++; + return NEWLINE; + + case '#': /* it's a comment */ + while ((c = nextc()) != '\n') { + if (c == '\0') + return 0; + } + sourceline++; + return NEWLINE; + + case '\\': +#ifdef RELAXED_CONTINUATION + if (!do_unix) { /* strip trailing white-space and/or comment */ + while ((c = nextc()) == ' ' || c == '\t') continue; + if (c == '#') + while ((c = nextc()) != '\n') if (!c) break; + pushback(); + } +#endif /*RELAXED_CONTINUATION*/ + if (nextc() == '\n') { + sourceline++; + goto retry; + } else + yyerror("inappropriate use of backslash"); + break; + + case '$': + want_assign = 1; + return '$'; + + case ')': + case ']': + case '(': + case '[': + case ';': + case ':': + case '?': + case '{': + case ',': + return c; + + case '*': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_assign_times; + return ASSIGNOP; + } else if (do_posix) { + pushback(); + return '*'; + } else if (c == '*') { + /* make ** and **= aliases for ^ and ^= */ + static int did_warn_op = 0, did_warn_assgn = 0; + + if (nextc() == '=') { + if (do_lint && ! did_warn_assgn) { + did_warn_assgn = 1; + warning("**= is not allowed by POSIX"); + } + yylval.nodetypeval = Node_assign_exp; + return ASSIGNOP; + } else { + pushback(); + if (do_lint && ! did_warn_op) { + did_warn_op = 1; + warning("** is not allowed by POSIX"); + } + return '^'; + } + } + pushback(); + return '*'; + + case '/': + if (want_assign) { + if (nextc() == '=') { + yylval.nodetypeval = Node_assign_quotient; + return ASSIGNOP; + } + pushback(); + } + return '/'; + + case '%': + if (nextc() == '=') { + yylval.nodetypeval = Node_assign_mod; + return ASSIGNOP; + } + pushback(); + return '%'; + + case '^': + { + static int did_warn_op = 0, did_warn_assgn = 0; + + if (nextc() == '=') { + + if (do_lint && ! did_warn_assgn) { + did_warn_assgn = 1; + warning("operator `^=' is not supported in old awk"); + } + yylval.nodetypeval = Node_assign_exp; + return ASSIGNOP; + } + pushback(); + if (do_lint && ! did_warn_op) { + did_warn_op = 1; + warning("operator `^' is not supported in old awk"); + } + return '^'; + } + + case '+': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_assign_plus; + return ASSIGNOP; + } + if (c == '+') + return INCREMENT; + pushback(); + return '+'; + + case '!': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_notequal; + return RELOP; + } + if (c == '~') { + yylval.nodetypeval = Node_nomatch; + want_assign = 0; + return MATCHOP; + } + pushback(); + return '!'; + + case '<': + if (nextc() == '=') { + yylval.nodetypeval = Node_leq; + return RELOP; + } + yylval.nodetypeval = Node_less; + pushback(); + return '<'; + + case '=': + if (nextc() == '=') { + yylval.nodetypeval = Node_equal; + return RELOP; + } + yylval.nodetypeval = Node_assign; + pushback(); + return ASSIGNOP; + + case '>': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_geq; + return RELOP; + } else if (c == '>') { + yylval.nodetypeval = Node_redirect_append; + return APPEND_OP; + } + yylval.nodetypeval = Node_greater; + pushback(); + return '>'; + + case '~': + yylval.nodetypeval = Node_match; + want_assign = 0; + return MATCHOP; + + case '}': + /* + * Added did newline stuff. Easier than + * hacking the grammar + */ + if (did_newline) { + did_newline = 0; + return c; + } + did_newline++; + --lexptr; /* pick up } next time */ + return NEWLINE; + + case '"': + esc_seen = 0; + while ((c = nextc()) != '"') { + if (c == '\n') { + pushback(); + yyerror("unterminated string"); + } + if (c == '\\') { + c = nextc(); + if (c == '\n') { + sourceline++; + continue; + } + esc_seen = 1; + tokadd('\\'); + } + if (c == '\0') { + pushback(); + yyerror("unterminated string"); + } + tokadd(c); + } + yylval.nodeval = make_str_node(tokstart, + token - tokstart, esc_seen ? SCAN : 0); + yylval.nodeval->flags |= PERM; + return YSTRING; + + case '-': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_assign_minus; + return ASSIGNOP; + } + if (c == '-') + return DECREMENT; + pushback(); + return '-'; + + case '.': + c = nextc(); + pushback(); + if (!isdigit(c)) + return '.'; + else + c = '.'; /* FALL THROUGH */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + /* It's a number */ + for (;;) { + int gotnumber = 0; + + tokadd(c); + switch (c) { + case '.': + if (seen_point) { + gotnumber++; + break; + } + ++seen_point; + break; + case 'e': + case 'E': + if (seen_e) { + gotnumber++; + break; + } + ++seen_e; + if ((c = nextc()) == '-' || c == '+') + tokadd(c); + else + pushback(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + break; + default: + gotnumber++; + } + if (gotnumber) + break; + c = nextc(); + } + pushback(); + yylval.nodeval = make_number(atof(tokstart)); + yylval.nodeval->flags |= PERM; + return YNUMBER; + + case '&': + if ((c = nextc()) == '&') { + yylval.nodetypeval = Node_and; + for (;;) { + c = nextc(); + if (c == '\0') + break; + if (c == '#') { + while ((c = nextc()) != '\n' && c != '\0') + ; + if (c == '\0') + break; + } + if (c == '\n') + sourceline++; + if (! isspace(c)) { + pushback(); + break; + } + } + want_assign = 0; + return LEX_AND; + } + pushback(); + return '&'; + + case '|': + if ((c = nextc()) == '|') { + yylval.nodetypeval = Node_or; + for (;;) { + c = nextc(); + if (c == '\0') + break; + if (c == '#') { + while ((c = nextc()) != '\n' && c != '\0') + ; + if (c == '\0') + break; + } + if (c == '\n') + sourceline++; + if (! isspace(c)) { + pushback(); + break; + } + } + want_assign = 0; + return LEX_OR; + } + pushback(); + return '|'; + } + + if (c != '_' && ! isalpha(c)) + yyerror("Invalid char '%c' in expression\n", c); + + /* it's some type of name-type-thing. Find its length */ + token = tokstart; + while (is_identchar(c)) { + tokadd(c); + c = nextc(); + } + tokadd('\0'); + emalloc(tokkey, char *, token - tokstart, "yylex"); + memcpy(tokkey, tokstart, token - tokstart); + pushback(); + + /* See if it is a special token. */ + low = 0; + high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1; + while (low <= high) { + int i/* , c */; + + mid = (low + high) / 2; + c = *tokstart - tokentab[mid].operator[0]; + i = c ? c : strcmp (tokstart, tokentab[mid].operator); + + if (i < 0) { /* token < mid */ + high = mid - 1; + } else if (i > 0) { /* token > mid */ + low = mid + 1; + } else { + if (do_lint) { + if (tokentab[mid].flags & GAWKX) + warning("%s() is a gawk extension", + tokentab[mid].operator); + if (tokentab[mid].flags & NOT_POSIX) + warning("POSIX does not allow %s", + tokentab[mid].operator); + if (tokentab[mid].flags & NOT_OLD) + warning("%s is not supported in old awk", + tokentab[mid].operator); + } + if ((do_unix && (tokentab[mid].flags & GAWKX)) + || (do_posix && (tokentab[mid].flags & NOT_POSIX))) + break; + if (tokentab[mid].class == LEX_BUILTIN + || tokentab[mid].class == LEX_LENGTH + ) + yylval.lval = mid; + else + yylval.nodetypeval = tokentab[mid].value; + + return tokentab[mid].class; + } + } + + yylval.sval = tokkey; + if (*lexptr == '(') + return FUNC_CALL; + else { + want_assign = 1; + return NAME; + } +} + +static NODE * +node_common(op) +NODETYPE op; +{ + register NODE *r; + + getnode(r); + r->type = op; + r->flags = MALLOC; + /* if lookahead is NL, lineno is 1 too high */ + if (lexeme && *lexeme == '\n') + r->source_line = sourceline - 1; + else + r->source_line = sourceline; + r->source_file = source; + return r; +} + +/* + * This allocates a node with defined lnode and rnode. + */ +NODE * +node(left, op, right) +NODE *left, *right; +NODETYPE op; +{ + register NODE *r; + + r = node_common(op); + r->lnode = left; + r->rnode = right; + return r; +} + +/* + * This allocates a node with defined subnode and proc for builtin functions + * Checks for arg. count and supplies defaults where possible. + */ +static NODE * +snode(subn, op, idx) +NODETYPE op; +int idx; +NODE *subn; +{ + register NODE *r; + register NODE *n; + int nexp = 0; + int args_allowed; + + r = node_common(op); + + /* traverse expression list to see how many args. given */ + for (n= subn; n; n= n->rnode) { + nexp++; + if (nexp > 3) + break; + } + + /* check against how many args. are allowed for this builtin */ + args_allowed = tokentab[idx].flags & ARGS; + if (args_allowed && !(args_allowed & A(nexp))) + fatal("%s() cannot have %d argument%c", + tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's'); + + r->proc = tokentab[idx].ptr; + + /* special case processing for a few builtins */ + if (nexp == 0 && r->proc == do_length) { + subn = node(node(make_number(0.0),Node_field_spec,(NODE *)NULL), + Node_expression_list, + (NODE *) NULL); + } else if (r->proc == do_match) { + if (subn->rnode->lnode->type != Node_regex) + subn->rnode->lnode = mk_rexp(subn->rnode->lnode); + } else if (r->proc == do_sub || r->proc == do_gsub) { + if (subn->lnode->type != Node_regex) + subn->lnode = mk_rexp(subn->lnode); + if (nexp == 2) + append_right(subn, node(node(make_number(0.0), + Node_field_spec, + (NODE *) NULL), + Node_expression_list, + (NODE *) NULL)); + else if (do_lint && subn->rnode->rnode->lnode->type == Node_val) + warning("string literal as last arg of substitute"); + } else if (r->proc == do_split) { + if (nexp == 2) + append_right(subn, + node(FS_node, Node_expression_list, (NODE *) NULL)); + n = subn->rnode->rnode->lnode; + if (n->type != Node_regex) + subn->rnode->rnode->lnode = mk_rexp(n); + if (nexp == 2) + subn->rnode->rnode->lnode->re_flags |= FS_DFLT; + } + + r->subnode = subn; + return r; +} + +/* + * This allocates a Node_line_range node with defined condpair and + * zeroes the trigger word to avoid the temptation of assuming that calling + * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'. + */ +/* Otherwise like node() */ +static NODE * +mkrangenode(cpair) +NODE *cpair; +{ + register NODE *r; + + getnode(r); + r->type = Node_line_range; + r->condpair = cpair; + r->triggered = 0; + return r; +} + +/* Build a for loop */ +static NODE * +make_for_loop(init, cond, incr) +NODE *init, *cond, *incr; +{ + register FOR_LOOP_HEADER *r; + NODE *n; + + emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); + getnode(n); + n->type = Node_illegal; + r->init = init; + r->cond = cond; + r->incr = incr; + n->sub.nodep.r.hd = r; + return n; +} + +/* + * Install a name in the symbol table, even if it is already there. + * Caller must check against redefinition if that is desired. + */ +NODE * +install(name, value) +char *name; +NODE *value; +{ + register NODE *hp; + register int len, bucket; + + len = strlen(name); + bucket = hash(name, len); + getnode(hp); + hp->type = Node_hashnode; + hp->hnext = variables[bucket]; + variables[bucket] = hp; + hp->hlength = len; + hp->hvalue = value; + hp->hname = name; + hp->hvalue->vname = name; + return hp->hvalue; +} + +/* find the most recent hash node for name installed by install */ +NODE * +lookup(name) +char *name; +{ + register NODE *bucket; + register int len; + + len = strlen(name); + bucket = variables[hash(name, len)]; + while (bucket) { + if (bucket->hlength == len && STREQN(bucket->hname, name, len)) + return bucket->hvalue; + bucket = bucket->hnext; + } + return NULL; +} + +/* + * Add new to the rightmost branch of LIST. This uses n^2 time, so we make + * a simple attempt at optimizing it. + */ +static NODE * +append_right(list, new) +NODE *list, *new; +{ + register NODE *oldlist; + static NODE *savefront = NULL, *savetail = NULL; + + oldlist = list; + if (savefront == oldlist) { + savetail = savetail->rnode = new; + return oldlist; + } else + savefront = oldlist; + while (list->rnode != NULL) + list = list->rnode; + savetail = list->rnode = new; + return oldlist; +} + +/* + * check if name is already installed; if so, it had better have Null value, + * in which case def is added as the value. Otherwise, install name with def + * as value. + */ +static void +func_install(params, def) +NODE *params; +NODE *def; +{ + NODE *r; + + pop_params(params->rnode); + pop_var(params, 0); + r = lookup(params->param); + if (r != NULL) { + fatal("function name `%s' previously defined", params->param); + } else + (void) install(params->param, node(params, Node_func, def)); +} + +static void +pop_var(np, freeit) +NODE *np; +int freeit; +{ + register NODE *bucket, **save; + register int len; + char *name; + + name = np->param; + len = strlen(name); + save = &(variables[hash(name, len)]); + for (bucket = *save; bucket; bucket = bucket->hnext) { + if (len == bucket->hlength && STREQN(bucket->hname, name, len)) { + *save = bucket->hnext; + freenode(bucket); + if (freeit) + free(np->param); + return; + } + save = &(bucket->hnext); + } +} + +static void +pop_params(params) +NODE *params; +{ + register NODE *np; + + for (np = params; np != NULL; np = np->rnode) + pop_var(np, 1); +} + +static NODE * +make_param(name) +char *name; +{ + NODE *r; + + getnode(r); + r->type = Node_param_list; + r->rnode = NULL; + r->param = name; + r->param_cnt = param_counter++; + return (install(name, r)); +} + +/* Name points to a variable name. Make sure its in the symbol table */ +NODE * +variable(name, can_free) +char *name; +int can_free; +{ + register NODE *r; + static int env_loaded = 0; + + if (!env_loaded && STREQ(name, "ENVIRON")) { + load_environ(); + env_loaded = 1; + } + if ((r = lookup(name)) == NULL) + r = install(name, node(Nnull_string, Node_var, (NODE *) NULL)); + else if (can_free) + free(name); + return r; +} + +static NODE * +mk_rexp(exp) +NODE *exp; +{ + if (exp->type == Node_regex) + return exp; + else { + NODE *n; + + getnode(n); + n->type = Node_regex; + n->re_exp = exp; + n->re_text = NULL; + n->re_reg = NULL; + n->re_flags = 0; + n->re_cnt = 1; + return n; + } +} diff --git a/gnu/usr.bin/awk/builtin.c b/gnu/usr.bin/awk/builtin.c new file mode 100644 index 0000000..9d5e3b3 --- /dev/null +++ b/gnu/usr.bin/awk/builtin.c @@ -0,0 +1,1133 @@ +/* + * builtin.c - Builtin functions and various utility procedures + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include "awk.h" + + +#ifndef SRANDOM_PROTO +extern void srandom P((int seed)); +#endif +#ifndef linux +extern char *initstate P((unsigned seed, char *state, int n)); +extern char *setstate P((char *state)); +extern long random P((void)); +#endif + +extern NODE **fields_arr; +extern int output_is_tty; + +static NODE *sub_common P((NODE *tree, int global)); + +#ifdef GFMT_WORKAROUND +char *gfmt P((double g, int prec, char *buf)); +#endif + +#ifdef _CRAY +/* Work around a problem in conversion of doubles to exact integers. */ +#include <float.h> +#define Floor(n) floor((n) * (1.0 + DBL_EPSILON)) +#define Ceil(n) ceil((n) * (1.0 + DBL_EPSILON)) + +/* Force the standard C compiler to use the library math functions. */ +extern double exp(double); +double (*Exp)() = exp; +#define exp(x) (*Exp)(x) +extern double log(double); +double (*Log)() = log; +#define log(x) (*Log)(x) +#else +#define Floor(n) floor(n) +#define Ceil(n) ceil(n) +#endif + +static void +efwrite(ptr, size, count, fp, from, rp, flush) +void *ptr; +unsigned size, count; +FILE *fp; +char *from; +struct redirect *rp; +int flush; +{ + errno = 0; + if (fwrite(ptr, size, count, fp) != count) + goto wrerror; + if (flush + && ((fp == stdout && output_is_tty) + || (rp && (rp->flag & RED_NOBUF)))) { + fflush(fp); + if (ferror(fp)) + goto wrerror; + } + return; + + wrerror: + fatal("%s to \"%s\" failed (%s)", from, + rp ? rp->value : "standard output", + errno ? strerror(errno) : "reason unknown"); +} + +/* Builtin functions */ +NODE * +do_exp(tree) +NODE *tree; +{ + NODE *tmp; + double d, res; +#ifndef exp + double exp P((double)); +#endif + + tmp= tree_eval(tree->lnode); + d = force_number(tmp); + free_temp(tmp); + errno = 0; + res = exp(d); + if (errno == ERANGE) + warning("exp argument %g is out of range", d); + return tmp_number((AWKNUM) res); +} + +NODE * +do_index(tree) +NODE *tree; +{ + NODE *s1, *s2; + register char *p1, *p2; + register int l1, l2; + long ret; + + + s1 = tree_eval(tree->lnode); + s2 = tree_eval(tree->rnode->lnode); + force_string(s1); + force_string(s2); + p1 = s1->stptr; + p2 = s2->stptr; + l1 = s1->stlen; + l2 = s2->stlen; + ret = 0; + if (IGNORECASE) { + while (l1) { + if (l2 > l1) + break; + if (casetable[(int)*p1] == casetable[(int)*p2] + && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) { + ret = 1 + s1->stlen - l1; + break; + } + l1--; + p1++; + } + } else { + while (l1) { + if (l2 > l1) + break; + if (*p1 == *p2 + && (l2 == 1 || STREQN(p1, p2, l2))) { + ret = 1 + s1->stlen - l1; + break; + } + l1--; + p1++; + } + } + free_temp(s1); + free_temp(s2); + return tmp_number((AWKNUM) ret); +} + +NODE * +do_int(tree) +NODE *tree; +{ + NODE *tmp; + double floor P((double)); + double ceil P((double)); + double d; + + tmp = tree_eval(tree->lnode); + d = force_number(tmp); + if (d >= 0) + d = Floor(d); + else + d = Ceil(d); + free_temp(tmp); + return tmp_number((AWKNUM) d); +} + +NODE * +do_length(tree) +NODE *tree; +{ + NODE *tmp; + int len; + + tmp = tree_eval(tree->lnode); + len = force_string(tmp)->stlen; + free_temp(tmp); + return tmp_number((AWKNUM) len); +} + +NODE * +do_log(tree) +NODE *tree; +{ + NODE *tmp; +#ifndef log + double log P((double)); +#endif + double d, arg; + + tmp = tree_eval(tree->lnode); + arg = (double) force_number(tmp); + if (arg < 0.0) + warning("log called with negative argument %g", arg); + d = log(arg); + free_temp(tmp); + return tmp_number((AWKNUM) d); +} + +/* %e and %f formats are not properly implemented. Someone should fix them */ +/* Actually, this whole thing should be reimplemented. */ + +NODE * +do_sprintf(tree) +NODE *tree; +{ +#define bchunk(s,l) if(l) {\ + while((l)>ofre) {\ + erealloc(obuf, char *, osiz*2, "do_sprintf");\ + ofre+=osiz;\ + osiz*=2;\ + }\ + memcpy(obuf+olen,s,(l));\ + olen+=(l);\ + ofre-=(l);\ + } + + /* Is there space for something L big in the buffer? */ +#define chksize(l) if((l)>ofre) {\ + erealloc(obuf, char *, osiz*2, "do_sprintf");\ + ofre+=osiz;\ + osiz*=2;\ + } + + /* + * Get the next arg to be formatted. If we've run out of args, + * return "" (Null string) + */ +#define parse_next_arg() {\ + if(!carg) { toofew = 1; break; }\ + else {\ + arg=tree_eval(carg->lnode);\ + carg=carg->rnode;\ + }\ + } + + NODE *r; + int toofew = 0; + char *obuf; + int osiz, ofre, olen; + static char chbuf[] = "0123456789abcdef"; + static char sp[] = " "; + char *s0, *s1; + int n0; + NODE *sfmt, *arg; + register NODE *carg; + long fw, prec, lj, alt, big; + long *cur; + long val; +#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */ + long tmp_uval; /* on 386i 4.0.1 C compiler -- it just hangs */ +#endif + unsigned long uval; + int sgn; + int base; + char cpbuf[30]; /* if we have numbers bigger than 30 */ + char *cend = &cpbuf[30];/* chars, we lose, but seems unlikely */ + char *cp; + char *fill; + double tmpval; + char *pr_str; + int ucasehex = 0; + char signchar = 0; + int len; + + + emalloc(obuf, char *, 120, "do_sprintf"); + osiz = 120; + ofre = osiz - 1; + olen = 0; + sfmt = tree_eval(tree->lnode); + sfmt = force_string(sfmt); + carg = tree->rnode; + for (s0 = s1 = sfmt->stptr, n0 = sfmt->stlen; n0-- > 0;) { + if (*s1 != '%') { + s1++; + continue; + } + bchunk(s0, s1 - s0); + s0 = s1; + cur = &fw; + fw = 0; + prec = 0; + lj = alt = big = 0; + fill = sp; + cp = cend; + s1++; + +retry: + --n0; + switch (*s1++) { + case '%': + bchunk("%", 1); + s0 = s1; + break; + + case '0': + if (fill != sp || lj) + goto lose; + if (cur == &fw) + fill = "0"; /* FALL through */ + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (cur == 0) + goto lose; + *cur = s1[-1] - '0'; + while (n0 > 0 && *s1 >= '0' && *s1 <= '9') { + --n0; + *cur = *cur * 10 + *s1++ - '0'; + } + goto retry; + case '*': + if (cur == 0) + goto lose; + parse_next_arg(); + *cur = force_number(arg); + free_temp(arg); + goto retry; + case ' ': /* print ' ' or '-' */ + case '+': /* print '+' or '-' */ + signchar = *(s1-1); + goto retry; + case '-': + if (lj || fill != sp) + goto lose; + lj++; + goto retry; + case '.': + if (cur != &fw) + goto lose; + cur = ≺ + goto retry; + case '#': + if (alt) + goto lose; + alt++; + goto retry; + case 'l': + if (big) + goto lose; + big++; + goto retry; + case 'c': + parse_next_arg(); + if (arg->flags & NUMBER) { +#ifdef sun386 + tmp_uval = arg->numbr; + uval= (unsigned long) tmp_uval; +#else + uval = (unsigned long) arg->numbr; +#endif + cpbuf[0] = uval; + prec = 1; + pr_str = cpbuf; + goto dopr_string; + } + if (! prec) + prec = 1; + else if (prec > arg->stlen) + prec = arg->stlen; + pr_str = arg->stptr; + goto dopr_string; + case 's': + parse_next_arg(); + arg = force_string(arg); + if (!prec || prec > arg->stlen) + prec = arg->stlen; + pr_str = arg->stptr; + + dopr_string: + if (fw > prec && !lj) { + while (fw > prec) { + bchunk(sp, 1); + fw--; + } + } + bchunk(pr_str, (int) prec); + if (fw > prec) { + while (fw > prec) { + bchunk(sp, 1); + fw--; + } + } + s0 = s1; + free_temp(arg); + break; + case 'd': + case 'i': + parse_next_arg(); + val = (long) force_number(arg); + free_temp(arg); + if (val < 0) { + sgn = 1; + val = -val; + } else + sgn = 0; + do { + *--cp = '0' + val % 10; + val /= 10; + } while (val); + if (sgn) + *--cp = '-'; + else if (signchar) + *--cp = signchar; + if (prec > fw) + fw = prec; + prec = cend - cp; + if (fw > prec && !lj) { + if (fill != sp && (*cp == '-' || signchar)) { + bchunk(cp, 1); + cp++; + prec--; + fw--; + } + while (fw > prec) { + bchunk(fill, 1); + fw--; + } + } + bchunk(cp, (int) prec); + if (fw > prec) { + while (fw > prec) { + bchunk(fill, 1); + fw--; + } + } + s0 = s1; + break; + case 'u': + base = 10; + goto pr_unsigned; + case 'o': + base = 8; + goto pr_unsigned; + case 'X': + ucasehex = 1; + case 'x': + base = 16; + goto pr_unsigned; + pr_unsigned: + parse_next_arg(); + uval = (unsigned long) force_number(arg); + free_temp(arg); + do { + *--cp = chbuf[uval % base]; + if (ucasehex && isalpha(*cp)) + *cp = toupper(*cp); + uval /= base; + } while (uval); + if (alt && (base == 8 || base == 16)) { + if (base == 16) { + if (ucasehex) + *--cp = 'X'; + else + *--cp = 'x'; + } + *--cp = '0'; + } + prec = cend - cp; + if (fw > prec && !lj) { + while (fw > prec) { + bchunk(fill, 1); + fw--; + } + } + bchunk(cp, (int) prec); + if (fw > prec) { + while (fw > prec) { + bchunk(fill, 1); + fw--; + } + } + s0 = s1; + break; + case 'g': + parse_next_arg(); + tmpval = force_number(arg); + free_temp(arg); + chksize(fw + prec + 9); /* 9==slop */ + + cp = cpbuf; + *cp++ = '%'; + if (lj) + *cp++ = '-'; + if (fill != sp) + *cp++ = '0'; +#ifndef GFMT_WORKAROUND + if (cur != &fw) { + (void) strcpy(cp, "*.*g"); + (void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval); + } else { + (void) strcpy(cp, "*g"); + (void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval); + } +#else /* GFMT_WORKAROUND */ + { + char *gptr, gbuf[120]; +#define DEFAULT_G_PRECISION 6 + if (fw + prec + 9 > sizeof gbuf) { /* 9==slop */ + emalloc(gptr, char *, fw+prec+9, "do_sprintf(gfmt)"); + } else + gptr = gbuf; + (void) gfmt((double) tmpval, cur != &fw ? + (int) prec : DEFAULT_G_PRECISION, gptr); + *cp++ = '*', *cp++ = 's', *cp = '\0'; + (void) sprintf(obuf + olen, cpbuf, (int) fw, gptr); + if (fill != sp && *gptr == ' ') { + char *p = gptr; + do { *p++ = '0'; } while (*p == ' '); + } + if (gptr != gbuf) free(gptr); + } +#endif /* GFMT_WORKAROUND */ + len = strlen(obuf + olen); + ofre -= len; + olen += len; + s0 = s1; + break; + + case 'f': + parse_next_arg(); + tmpval = force_number(arg); + free_temp(arg); + chksize(fw + prec + 9); /* 9==slop */ + + cp = cpbuf; + *cp++ = '%'; + if (lj) + *cp++ = '-'; + if (fill != sp) + *cp++ = '0'; + if (cur != &fw) { + (void) strcpy(cp, "*.*f"); + (void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval); + } else { + (void) strcpy(cp, "*f"); + (void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval); + } + len = strlen(obuf + olen); + ofre -= len; + olen += len; + s0 = s1; + break; + case 'e': + parse_next_arg(); + tmpval = force_number(arg); + free_temp(arg); + chksize(fw + prec + 9); /* 9==slop */ + cp = cpbuf; + *cp++ = '%'; + if (lj) + *cp++ = '-'; + if (fill != sp) + *cp++ = '0'; + if (cur != &fw) { + (void) strcpy(cp, "*.*e"); + (void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval); + } else { + (void) strcpy(cp, "*e"); + (void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval); + } + len = strlen(obuf + olen); + ofre -= len; + olen += len; + s0 = s1; + break; + + default: + lose: + break; + } + if (toofew) + fatal("%s\n\t%s\n\t%*s%s", + "not enough arguments to satisfy format string", + sfmt->stptr, s1 - sfmt->stptr - 2, "", + "^ ran out for this one" + ); + } + if (do_lint && carg != NULL) + warning("too many arguments supplied for format string"); + bchunk(s0, s1 - s0); + free_temp(sfmt); + r = make_str_node(obuf, olen, ALREADY_MALLOCED); + r->flags |= TEMP; + return r; +} + +void +do_printf(tree) +register NODE *tree; +{ + struct redirect *rp = NULL; + register FILE *fp; + + if (tree->rnode) { + int errflg; /* not used, sigh */ + + rp = redirect(tree->rnode, &errflg); + if (rp) { + fp = rp->fp; + if (!fp) + return; + } else + return; + } else + fp = stdout; + tree = do_sprintf(tree->lnode); + efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp , 1); + free_temp(tree); +} + +NODE * +do_sqrt(tree) +NODE *tree; +{ + NODE *tmp; + double arg; + extern double sqrt P((double)); + + tmp = tree_eval(tree->lnode); + arg = (double) force_number(tmp); + free_temp(tmp); + if (arg < 0.0) + warning("sqrt called with negative argument %g", arg); + return tmp_number((AWKNUM) sqrt(arg)); +} + +NODE * +do_substr(tree) +NODE *tree; +{ + NODE *t1, *t2, *t3; + NODE *r; + register int indx; + size_t length; + + t1 = tree_eval(tree->lnode); + t2 = tree_eval(tree->rnode->lnode); + if (tree->rnode->rnode == NULL) /* third arg. missing */ + length = t1->stlen; + else { + t3 = tree_eval(tree->rnode->rnode->lnode); + length = (size_t) force_number(t3); + free_temp(t3); + } + indx = (int) force_number(t2) - 1; + free_temp(t2); + t1 = force_string(t1); + if (indx < 0) + indx = 0; + if (indx >= t1->stlen || length <= 0) { + free_temp(t1); + return Nnull_string; + } + if (indx + length > t1->stlen || LONG_MAX - indx < length) + length = t1->stlen - indx; + r = tmp_string(t1->stptr + indx, length); + free_temp(t1); + return r; +} + +NODE * +do_strftime(tree) +NODE *tree; +{ + NODE *t1, *t2; + struct tm *tm; + time_t fclock; + char buf[100]; + int ret; + + t1 = force_string(tree_eval(tree->lnode)); + + if (tree->rnode == NULL) /* second arg. missing, default */ + (void) time(&fclock); + else { + t2 = tree_eval(tree->rnode->lnode); + fclock = (time_t) force_number(t2); + free_temp(t2); + } + tm = localtime(&fclock); + + ret = strftime(buf, 100, t1->stptr, tm); + + return tmp_string(buf, ret); +} + +NODE * +do_systime(tree) +NODE *tree; +{ + time_t lclock; + + (void) time(&lclock); + return tmp_number((AWKNUM) lclock); +} + +NODE * +do_system(tree) +NODE *tree; +{ + NODE *tmp; + int ret = 0; + char *cmd; + + (void) flush_io (); /* so output is synchronous with gawk's */ + tmp = tree_eval(tree->lnode); + cmd = force_string(tmp)->stptr; + if (cmd && *cmd) { + ret = system(cmd); + ret = (ret >> 8) & 0xff; + } + free_temp(tmp); + return tmp_number((AWKNUM) ret); +} + +void +do_print(tree) +register NODE *tree; +{ + register NODE *t1; + struct redirect *rp = NULL; + register FILE *fp; + register char *s; + + if (tree->rnode) { + int errflg; /* not used, sigh */ + + rp = redirect(tree->rnode, &errflg); + if (rp) { + fp = rp->fp; + if (!fp) + return; + } else + return; + } else + fp = stdout; + tree = tree->lnode; + while (tree) { + t1 = tree_eval(tree->lnode); + if (t1->flags & NUMBER) { + if (OFMTidx == CONVFMTidx) + (void) force_string(t1); + else { + char buf[100]; + + sprintf(buf, OFMT, t1->numbr); + t1 = tmp_string(buf, strlen(buf)); + } + } + efwrite(t1->stptr, sizeof(char), t1->stlen, fp, "print", rp, 0); + free_temp(t1); + tree = tree->rnode; + if (tree) { + s = OFS; + if (OFSlen) + efwrite(s, sizeof(char), OFSlen, fp, "print", rp, 0); + } + } + s = ORS; + if (ORSlen) + efwrite(s, sizeof(char), ORSlen, fp, "print", rp, 1); +} + +NODE * +do_tolower(tree) +NODE *tree; +{ + NODE *t1, *t2; + register char *cp, *cp2; + + t1 = tree_eval(tree->lnode); + t1 = force_string(t1); + t2 = tmp_string(t1->stptr, t1->stlen); + for (cp = t2->stptr, cp2 = t2->stptr + t2->stlen; cp < cp2; cp++) + if (isupper(*cp)) + *cp = tolower(*cp); + free_temp(t1); + return t2; +} + +NODE * +do_toupper(tree) +NODE *tree; +{ + NODE *t1, *t2; + register char *cp; + + t1 = tree_eval(tree->lnode); + t1 = force_string(t1); + t2 = tmp_string(t1->stptr, t1->stlen); + for (cp = t2->stptr; cp < t2->stptr + t2->stlen; cp++) + if (islower(*cp)) + *cp = toupper(*cp); + free_temp(t1); + return t2; +} + +NODE * +do_atan2(tree) +NODE *tree; +{ + NODE *t1, *t2; + extern double atan2 P((double, double)); + double d1, d2; + + t1 = tree_eval(tree->lnode); + t2 = tree_eval(tree->rnode->lnode); + d1 = force_number(t1); + d2 = force_number(t2); + free_temp(t1); + free_temp(t2); + return tmp_number((AWKNUM) atan2(d1, d2)); +} + +NODE * +do_sin(tree) +NODE *tree; +{ + NODE *tmp; + extern double sin P((double)); + double d; + + tmp = tree_eval(tree->lnode); + d = sin((double)force_number(tmp)); + free_temp(tmp); + return tmp_number((AWKNUM) d); +} + +NODE * +do_cos(tree) +NODE *tree; +{ + NODE *tmp; + extern double cos P((double)); + double d; + + tmp = tree_eval(tree->lnode); + d = cos((double)force_number(tmp)); + free_temp(tmp); + return tmp_number((AWKNUM) d); +} + +static int firstrand = 1; +static char state[256]; + +/* ARGSUSED */ +NODE * +do_rand(tree) +NODE *tree; +{ + if (firstrand) { + (void) initstate((unsigned) 1, state, sizeof state); + srandom(1); + firstrand = 0; + } + return tmp_number((AWKNUM) random() / LONG_MAX); +} + +NODE * +do_srand(tree) +NODE *tree; +{ + NODE *tmp; + static long save_seed = 0; + long ret = save_seed; /* SVR4 awk srand returns previous seed */ + + if (firstrand) + (void) initstate((unsigned) 1, state, sizeof state); + else + (void) setstate(state); + + if (!tree) + srandom((int) (save_seed = (long) time((time_t *) 0))); + else { + tmp = tree_eval(tree->lnode); + srandom((int) (save_seed = (long) force_number(tmp))); + free_temp(tmp); + } + firstrand = 0; + return tmp_number((AWKNUM) ret); +} + +NODE * +do_match(tree) +NODE *tree; +{ + NODE *t1; + int rstart; + AWKNUM rlength; + Regexp *rp; + + t1 = force_string(tree_eval(tree->lnode)); + tree = tree->rnode->lnode; + rp = re_update(tree); + rstart = research(rp, t1->stptr, 0, t1->stlen, 1); + if (rstart >= 0) { /* match succeded */ + rstart++; /* 1-based indexing */ + rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr); + } else { /* match failed */ + rstart = 0; + rlength = -1.0; + } + free_temp(t1); + unref(RSTART_node->var_value); + RSTART_node->var_value = make_number((AWKNUM) rstart); + unref(RLENGTH_node->var_value); + RLENGTH_node->var_value = make_number(rlength); + return tmp_number((AWKNUM) rstart); +} + +static NODE * +sub_common(tree, global) +NODE *tree; +int global; +{ + register char *scan; + register char *bp, *cp; + char *buf; + int buflen; + register char *matchend; + register int len; + char *matchstart; + char *text; + int textlen; + char *repl; + char *replend; + int repllen; + int sofar; + int ampersands; + int matches = 0; + Regexp *rp; + NODE *s; /* subst. pattern */ + NODE *t; /* string to make sub. in; $0 if none given */ + NODE *tmp; + NODE **lhs = &tree; /* value not used -- just different from NULL */ + int priv = 0; + Func_ptr after_assign = NULL; + + tmp = tree->lnode; + rp = re_update(tmp); + + tree = tree->rnode; + s = tree->lnode; + + tree = tree->rnode; + tmp = tree->lnode; + t = force_string(tree_eval(tmp)); + + /* do the search early to avoid work on non-match */ + if (research(rp, t->stptr, 0, t->stlen, 1) == -1 || + (RESTART(rp, t->stptr) > t->stlen) && (matches = 1)) { + free_temp(t); + return tmp_number((AWKNUM) matches); + } + + if (tmp->type == Node_val) + lhs = NULL; + else + lhs = get_lhs(tmp, &after_assign); + t->flags |= STRING; + /* + * create a private copy of the string + */ + if (t->stref > 1 || (t->flags & PERM)) { + unsigned int saveflags; + + saveflags = t->flags; + t->flags &= ~MALLOC; + tmp = dupnode(t); + t->flags = saveflags; + t = tmp; + priv = 1; + } + text = t->stptr; + textlen = t->stlen; + buflen = textlen + 2; + + s = force_string(tree_eval(s)); + repl = s->stptr; + replend = repl + s->stlen; + repllen = replend - repl; + emalloc(buf, char *, buflen, "do_sub"); + ampersands = 0; + for (scan = repl; scan < replend; scan++) { + if (*scan == '&') { + repllen--; + ampersands++; + } else if (*scan == '\\' && (*(scan+1) == '&' || *(scan+1) == '\\')) { + repllen--; + scan++; + } + } + + bp = buf; + for (;;) { + matches++; + matchstart = t->stptr + RESTART(rp, t->stptr); + matchend = t->stptr + REEND(rp, t->stptr); + + /* + * create the result, copying in parts of the original + * string + */ + len = matchstart - text + repllen + + ampersands * (matchend - matchstart); + sofar = bp - buf; + while (buflen - sofar - len - 1 < 0) { + buflen *= 2; + erealloc(buf, char *, buflen, "do_sub"); + bp = buf + sofar; + } + for (scan = text; scan < matchstart; scan++) + *bp++ = *scan; + for (scan = repl; scan < replend; scan++) + if (*scan == '&') + for (cp = matchstart; cp < matchend; cp++) + *bp++ = *cp; + else if (*scan == '\\' && (*(scan+1) == '&' || *(scan+1) == '\\')) { + scan++; + *bp++ = *scan; + } else + *bp++ = *scan; + if (global && matchstart == matchend && matchend < text + textlen) { + *bp++ = *matchend; + matchend++; + } + textlen = text + textlen - matchend; + text = matchend; + if (!global || textlen <= 0 || + research(rp, t->stptr, text-t->stptr, textlen, 1) == -1) + break; + } + sofar = bp - buf; + if (buflen - sofar - textlen - 1) { + buflen = sofar + textlen + 2; + erealloc(buf, char *, buflen, "do_sub"); + bp = buf + sofar; + } + for (scan = matchend; scan < text + textlen; scan++) + *bp++ = *scan; + textlen = bp - buf; + free(t->stptr); + t->stptr = buf; + t->stlen = textlen; + + free_temp(s); + if (matches > 0 && lhs) { + if (priv) { + unref(*lhs); + *lhs = t; + } + if (after_assign) + (*after_assign)(); + t->flags &= ~(NUM|NUMBER); + } + return tmp_number((AWKNUM) matches); +} + +NODE * +do_gsub(tree) +NODE *tree; +{ + return sub_common(tree, 1); +} + +NODE * +do_sub(tree) +NODE *tree; +{ + return sub_common(tree, 0); +} + +#ifdef GFMT_WORKAROUND + /* + * printf's %g format [can't rely on gcvt()] + * caveat: don't use as argument to *printf()! + */ +char * +gfmt(g, prec, buf) +double g; /* value to format */ +int prec; /* indicates desired significant digits, not decimal places */ +char *buf; /* return buffer; assumed big enough to hold result */ +{ + if (g == 0.0) { + (void) strcpy(buf, "0"); /* easy special case */ + } else { + register char *d, *e, *p; + + /* start with 'e' format (it'll provide nice exponent) */ + if (prec < 1) prec = 1; /* at least 1 significant digit */ + (void) sprintf(buf, "%.*e", prec - 1, g); + if ((e = strchr(buf, 'e')) != 0) { /* find exponent */ + int exp = atoi(e+1); /* fetch exponent */ + if (exp >= -4 && exp < prec) { /* per K&R2, B1.2 */ + /* switch to 'f' format and re-do */ + prec -= (exp + 1); /* decimal precision */ + (void) sprintf(buf, "%.*f", prec, g); + e = buf + strlen(buf); + } + if ((d = strchr(buf, '.')) != 0) { + /* remove trailing zeroes and decimal point */ + for (p = e; p > d && *--p == '0'; ) continue; + if (*p == '.') --p; + if (++p < e) /* copy exponent and NUL */ + while ((*p++ = *e++) != '\0') continue; + } + } + } + return buf; +} +#endif /* GFMT_WORKAROUND */ diff --git a/gnu/usr.bin/awk/config.h b/gnu/usr.bin/awk/config.h new file mode 100644 index 0000000..8c20953 --- /dev/null +++ b/gnu/usr.bin/awk/config.h @@ -0,0 +1,272 @@ +/* + * config.h -- configuration definitions for gawk. + * + * For generic 4.4 alpha + */ + +/* + * Copyright (C) 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * This file isolates configuration dependencies for gnu awk. + * You should know something about your system, perhaps by having + * a manual handy, when you edit this file. You should copy config.h-dist + * to config.h, and edit config.h. Do not modify config.h-dist, so that + * it will be easy to apply any patches that may be distributed. + * + * The general idea is that systems conforming to the various standards + * should need to do the least amount of changing. Definining the various + * items in ths file usually means that your system is missing that + * particular feature. + * + * The order of preference in standard conformance is ANSI C, POSIX, + * and the SVID. + * + * If you have no clue as to what's going on with your system, try + * compiling gawk without editing this file and see what shows up + * missing in the link stage. From there, you can probably figure out + * which defines to turn on. + */ + +/**************************/ +/* Miscellanious features */ +/**************************/ + +/* + * BLKSIZE_MISSING + * + * Check your /usr/include/sys/stat.h file. If the stat structure + * does not have a member named st_blksize, define this. (This will + * most likely be the case on most System V systems prior to V.4.) + */ +/* #define BLKSIZE_MISSING 1 */ + +/* + * SIGTYPE + * + * The return type of the routines passed to the signal function. + * Modern systems use `void', older systems use `int'. + * If left undefined, it will default to void. + */ +/* #define SIGTYPE int */ + +/* + * SIZE_T_MISSING + * + * If your system has no typedef for size_t, define this to get a default + */ +/* #define SIZE_T_MISSING 1 */ + +/* + * CHAR_UNSIGNED + * + * If your machine uses unsigned characters (IBM RT and RS/6000 and others) + * then define this for use in regex.c + */ +/* #define CHAR_UNSIGNED 1 */ + +/* + * HAVE_UNDERSCORE_SETJMP + * + * Check in your /usr/include/setjmp.h file. If there are routines + * there named _setjmp and _longjmp, then you should define this. + * Typically only systems derived from Berkeley Unix have this. + */ +#define HAVE_UNDERSCORE_SETJMP 1 + +/***********************************************/ +/* Missing library subroutines or system calls */ +/***********************************************/ + +/* + * MEMCMP_MISSING + * MEMCPY_MISSING + * MEMSET_MISSING + * + * These three routines are for manipulating blocks of memory. Most + * likely they will either all three be present or all three be missing, + * so they're grouped together. + */ +/* #define MEMCMP_MISSING 1 */ +/* #define MEMCPY_MISSING 1 */ +/* #define MEMSET_MISSING 1 */ + +/* + * RANDOM_MISSING + * + * Your system does not have the random(3) suite of random number + * generating routines. These are different than the old rand(3) + * routines! + */ +/* #define RANDOM_MISSING 1 */ + +/* + * STRCASE_MISSING + * + * Your system does not have the strcasemp() and strncasecmp() + * routines that originated in Berkeley Unix. + */ +/* #define STRCASE_MISSING 1 */ + +/* + * STRCHR_MISSING + * + * Your system does not have the strchr() and strrchr() functions. + */ +/* #define STRCHR_MISSING 1 */ + +/* + * STRERROR_MISSING + * + * Your system lacks the ANSI C strerror() routine for returning the + * strings associated with errno values. + */ +/* #define STRERROR_MISSING 1 */ + +/* + * STRTOD_MISSING + * + * Your system does not have the strtod() routine for converting + * strings to double precision floating point values. + */ +/* #define STRTOD_MISSING 1 */ + +/* + * STRFTIME_MISSING + * + * Your system lacks the ANSI C strftime() routine for formatting + * broken down time values. + */ +/* #define STRFTIME_MISSING 1 */ + +/* + * TZSET_MISSING + * + * If you have a 4.2 BSD vintage system, then the strftime() routine + * supplied in the missing directory won't be enough, because it relies on the + * tzset() routine from System V / Posix. Fortunately, there is an + * emulation for tzset() too that should do the trick. If you don't + * have tzset(), define this. + */ +/* #define TZSET_MISSING 1 */ + +/* + * TZNAME_MISSING + * + * Some systems do not support the external variables tzname and daylight. + * If this is the case *and* strftime() is missing, define this. + */ +/* #define TZNAME_MISSING 1 */ + +/* + * STDC_HEADERS + * + * If your system does have ANSI compliant header files that + * provide prototypes for library routines, then define this. + */ +#define STDC_HEADERS 1 + +/* + * NO_TOKEN_PASTING + * + * If your compiler define's __STDC__ but does not support token + * pasting (tok##tok), then define this. + */ +/* #define NO_TOKEN_PASTING 1 */ + +/*****************************************************************/ +/* Stuff related to the Standard I/O Library. */ +/*****************************************************************/ +/* Much of this is (still, unfortunately) black magic in nature. */ +/* You may have to use some or all of these together to get gawk */ +/* to work correctly. */ +/*****************************************************************/ + +/* + * NON_STD_SPRINTF + * + * Look in your /usr/include/stdio.h file. If the return type of the + * sprintf() function is NOT `int', define this. + */ +/* #define NON_STD_SPRINTF 1 */ + +/* + * VPRINTF_MISSING + * + * Define this if your system lacks vprintf() and the other routines + * that go with it. This will trigger an attempt to use _doprnt(). + * If you don't have that, this attempt will fail and you are on your own. + */ +/* #define VPRINTF_MISSING 1 */ + +/* + * Casts from size_t to int and back. These will become unnecessary + * at some point in the future, but for now are required where the + * two types are a different representation. + */ +/* #define SZTC */ +/* #define INTC */ + +/* + * SYSTEM_MISSING + * + * Define this if your library does not provide a system function + * or you are not entirely happy with it and would rather use + * a provided replacement (atari only). + */ +/* #define SYSTEM_MISSING 1 */ + +/* + * FMOD_MISSING + * + * Define this if your system lacks the fmod() function and modf() will + * be used instead. + */ +/* #define FMOD_MISSING 1 */ + + +/*******************************/ +/* Gawk configuration options. */ +/*******************************/ + +/* + * DEFPATH + * + * The default search path for the -f option of gawk. It is used + * if the AWKPATH environment variable is undefined. The default + * definition is provided here. Most likely you should not change + * this. + */ + +/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */ +/* #define ENVSEP ':' */ + +/* + * alloca already has a prototype defined - don't redefine it + */ +#define ALLOCA_PROTO 1 + +/* + * srandom already has a prototype defined - don't redefine it + */ +#define SRANDOM_PROTO 1 + +/* anything that follows is for system-specific short-term kludges */ diff --git a/gnu/usr.bin/awk/dfa.c b/gnu/usr.bin/awk/dfa.c new file mode 100644 index 0000000..5293c75 --- /dev/null +++ b/gnu/usr.bin/awk/dfa.c @@ -0,0 +1,2291 @@ +/* dfa.c - determinisitic extended regexp routines for GNU + Copyright (C) 1988 Free Software Foundation, Inc. + Written June, 1988 by Mike Haertel + Modified July, 1988 by Arthur David Olson + to assist BMG speedups + + NO WARRANTY + + BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY +NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT +WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC, +RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS" +WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY +AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M. +STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY +WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE +LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR +OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR +DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR +A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS +PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. + + GENERAL PUBLIC LICENSE TO COPY + + 1. You may copy and distribute verbatim copies of this source file +as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy a valid copyright notice "Copyright + (C) 1988 Free Software Foundation, Inc."; and include following the +copyright notice a verbatim copy of the above disclaimer of warranty +and of this License. You may charge a distribution fee for the +physical act of transferring a copy. + + 2. You may modify your copy or copies of this source file or +any portion of it, and copy and distribute such modifications under +the terms of Paragraph 1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating + that you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, + that in whole or in part contains or is a derivative of this + program or any part thereof, to be licensed at no charge to all + third parties on terms identical to those contained in this + License Agreement (except that you may choose to grant more extensive + warranty protection to some or all third parties, at your option). + + c) You may charge a distribution fee for the physical act of + transferring a copy, and you may at your option offer warranty + protection in exchange for a fee. + +Mere aggregation of another unrelated program with this program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other program under the scope of these terms. + + 3. You may copy and distribute this program or any portion of it in +compiled, executable or object code form under the terms of Paragraphs +1 and 2 above provided that you do the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal + shipping charge) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +For an executable file, complete source code means all the source code for +all modules it contains; but, as a special exception, it need not include +source code for modules which are standard libraries that accompany the +operating system on which the executable file runs. + + 4. You may not copy, sublicense, distribute or transfer this program +except as expressly provided under this License Agreement. Any attempt +otherwise to copy, sublicense, distribute or transfer this program is void and +your rights to use the program under this License agreement shall be +automatically terminated. However, parties who have received computer +software programs from you with this License Agreement will not have +their licenses terminated so long as such parties remain in full compliance. + + 5. If you wish to incorporate parts of this program into other free +programs whose distribution conditions are different, write to the Free +Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet +worked out a simple rule that can be stated here, but we will often permit +this. We will be guided by the two goals of preserving the free status of +all derivatives our free software and of promoting the sharing and reuse of +software. + + +In other words, you are welcome to use, share and improve this program. +You are forbidden to forbid anyone else to use, share and improve +what you give them. Help stamp out software-hoarding! */ + +#include "awk.h" +#include <assert.h> + +#ifdef setbit /* surprise - setbit and clrbit are macros on NeXT */ +#undef setbit +#endif +#ifdef clrbit +#undef clrbit +#endif + +#ifdef __STDC__ +typedef void *ptr_t; +#else +typedef char *ptr_t; +#endif + +typedef struct { + char ** in; + char * left; + char * right; + char * is; +} must; + +static ptr_t xcalloc P((int n, size_t s)); +static ptr_t xmalloc P((size_t n)); +static ptr_t xrealloc P((ptr_t p, size_t n)); +static int tstbit P((int b, _charset c)); +static void setbit P((int b, _charset c)); +static void clrbit P((int b, _charset c)); +static void copyset P((const _charset src, _charset dst)); +static void zeroset P((_charset s)); +static void notset P((_charset s)); +static int equal P((const _charset s1, const _charset s2)); +static int charset_index P((const _charset s)); +static _token lex P((void)); +static void addtok P((_token t)); +static void atom P((void)); +static void closure P((void)); +static void branch P((void)); +static void regexp P((void)); +static void copy P((const _position_set *src, _position_set *dst)); +static void insert P((_position p, _position_set *s)); +static void merge P((_position_set *s1, _position_set *s2, _position_set *m)); +static void delete P((_position p, _position_set *s)); +static int state_index P((struct regexp *r, _position_set *s, + int newline, int letter)); +static void epsclosure P((_position_set *s, struct regexp *r)); +static void build_state P((int s, struct regexp *r)); +static void build_state_zero P((struct regexp *r)); +static char *icatalloc P((char *old, const char *new)); +static char *icpyalloc P((const char *string)); +static char *istrstr P((char *lookin, char *lookfor)); +static void ifree P((char *cp)); +static void freelist P((char **cpp)); +static char **enlist P((char **cpp, char *new, size_t len)); +static char **comsubs P((char *left, char *right)); +static char **addlists P((char **old, char **new)); +static char **inboth P((char **left, char **right)); +static void resetmust P((must *mp)); +static void regmust P((struct regexp *r)); + +#undef P + +static ptr_t +xcalloc(n, s) + int n; + size_t s; +{ + ptr_t r = calloc(n, s); + + if (NULL == r) + reg_error("Memory exhausted"); /* reg_error does not return */ + return r; +} + +static ptr_t +xmalloc(n) + size_t n; +{ + ptr_t r = malloc(n); + + assert(n != 0); + if (NULL == r) + reg_error("Memory exhausted"); + return r; +} + +static ptr_t +xrealloc(p, n) + ptr_t p; + size_t n; +{ + ptr_t r = realloc(p, n); + + assert(n != 0); + if (NULL == r) + reg_error("Memory exhausted"); + return r; +} + +#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t))) +#undef MALLOC +#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t))) +#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t))) + +/* Reallocate an array of type t if nalloc is too small for index. */ +#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \ + if ((index) >= (nalloc)) \ + { \ + while ((index) >= (nalloc)) \ + (nalloc) *= 2; \ + REALLOC(p, t, nalloc); \ + } + +/* Stuff pertaining to charsets. */ + +static int +tstbit(b, c) + int b; + _charset c; +{ + return c[b / INTBITS] & 1 << b % INTBITS; +} + +static void +setbit(b, c) + int b; + _charset c; +{ + c[b / INTBITS] |= 1 << b % INTBITS; +} + +static void +clrbit(b, c) + int b; + _charset c; +{ + c[b / INTBITS] &= ~(1 << b % INTBITS); +} + +static void +copyset(src, dst) + const _charset src; + _charset dst; +{ + int i; + + for (i = 0; i < _CHARSET_INTS; ++i) + dst[i] = src[i]; +} + +static void +zeroset(s) + _charset s; +{ + int i; + + for (i = 0; i < _CHARSET_INTS; ++i) + s[i] = 0; +} + +static void +notset(s) + _charset s; +{ + int i; + + for (i = 0; i < _CHARSET_INTS; ++i) + s[i] = ~s[i]; +} + +static int +equal(s1, s2) + const _charset s1; + const _charset s2; +{ + int i; + + for (i = 0; i < _CHARSET_INTS; ++i) + if (s1[i] != s2[i]) + return 0; + return 1; +} + +/* A pointer to the current regexp is kept here during parsing. */ +static struct regexp *reg; + +/* Find the index of charset s in reg->charsets, or allocate a new charset. */ +static int +charset_index(s) + const _charset s; +{ + int i; + + for (i = 0; i < reg->cindex; ++i) + if (equal(s, reg->charsets[i])) + return i; + REALLOC_IF_NECESSARY(reg->charsets, _charset, reg->calloc, reg->cindex); + ++reg->cindex; + copyset(s, reg->charsets[i]); + return i; +} + +/* Syntax bits controlling the behavior of the lexical analyzer. */ +static syntax_bits, syntax_bits_set; + +/* Flag for case-folding letters into sets. */ +static case_fold; + +/* Entry point to set syntax options. */ +void +regsyntax(bits, fold) + long bits; + int fold; +{ + syntax_bits_set = 1; + syntax_bits = bits; + case_fold = fold; +} + +/* Lexical analyzer. */ +static const char *lexstart; /* Pointer to beginning of input string. */ +static const char *lexptr; /* Pointer to next input character. */ +static lexleft; /* Number of characters remaining. */ +static caret_allowed; /* True if backward context allows ^ + (meaningful only if RE_CONTEXT_INDEP_OPS + is turned off). */ +static closure_allowed; /* True if backward context allows closures + (meaningful only if RE_CONTEXT_INDEP_OPS + is turned off). */ + +/* Note that characters become unsigned here. */ +#define FETCH(c, eoferr) \ + { \ + if (! lexleft) \ + if (eoferr != NULL) \ + reg_error(eoferr); \ + else \ + return _END; \ + (c) = (unsigned char) *lexptr++; \ + --lexleft; \ + } + +static _token +lex() +{ + _token c, c2; + int invert; + _charset cset; + + FETCH(c, (char *) 0); + switch (c) + { + case '^': + if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) + && (!caret_allowed || + ((syntax_bits & RE_TIGHT_VBAR) && lexptr - 1 != lexstart))) + goto normal_char; + caret_allowed = 0; + return syntax_bits & RE_TIGHT_VBAR ? _ALLBEGLINE : _BEGLINE; + + case '$': + if (syntax_bits & RE_CONTEXT_INDEP_OPS || !lexleft + || (! (syntax_bits & RE_TIGHT_VBAR) + && ((syntax_bits & RE_NO_BK_PARENS + ? lexleft > 0 && *lexptr == ')' + : lexleft > 1 && *lexptr == '\\' && lexptr[1] == ')') + || (syntax_bits & RE_NO_BK_VBAR + ? lexleft > 0 && *lexptr == '|' + : lexleft > 1 && *lexptr == '\\' && lexptr[1] == '|')))) + return syntax_bits & RE_TIGHT_VBAR ? _ALLENDLINE : _ENDLINE; + goto normal_char; + + case '\\': + FETCH(c, "Unfinished \\ quote"); + switch (c) + { + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + caret_allowed = 0; + closure_allowed = 1; + return _BACKREF; + + case '<': + caret_allowed = 0; + return _BEGWORD; + + case '>': + caret_allowed = 0; + return _ENDWORD; + + case 'b': + caret_allowed = 0; + return _LIMWORD; + + case 'B': + caret_allowed = 0; + return _NOTLIMWORD; + + case 'w': + case 'W': + zeroset(cset); + for (c2 = 0; c2 < _NOTCHAR; ++c2) + if (ISALNUM(c2)) + setbit(c2, cset); + if (c == 'W') + notset(cset); + caret_allowed = 0; + closure_allowed = 1; + return _SET + charset_index(cset); + + case '?': + if (syntax_bits & RE_BK_PLUS_QM) + goto qmark; + goto normal_char; + + case '+': + if (syntax_bits & RE_BK_PLUS_QM) + goto plus; + goto normal_char; + + case '|': + if (! (syntax_bits & RE_NO_BK_VBAR)) + goto or; + goto normal_char; + + case '(': + if (! (syntax_bits & RE_NO_BK_PARENS)) + goto lparen; + goto normal_char; + + case ')': + if (! (syntax_bits & RE_NO_BK_PARENS)) + goto rparen; + goto normal_char; + + default: + goto normal_char; + } + + case '?': + if (syntax_bits & RE_BK_PLUS_QM) + goto normal_char; + qmark: + if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed) + goto normal_char; + return _QMARK; + + case '*': + if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed) + goto normal_char; + return _STAR; + + case '+': + if (syntax_bits & RE_BK_PLUS_QM) + goto normal_char; + plus: + if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed) + goto normal_char; + return _PLUS; + + case '|': + if (! (syntax_bits & RE_NO_BK_VBAR)) + goto normal_char; + or: + caret_allowed = 1; + closure_allowed = 0; + return _OR; + + case '\n': + if (! (syntax_bits & RE_NEWLINE_OR)) + goto normal_char; + goto or; + + case '(': + if (! (syntax_bits & RE_NO_BK_PARENS)) + goto normal_char; + lparen: + caret_allowed = 1; + closure_allowed = 0; + return _LPAREN; + + case ')': + if (! (syntax_bits & RE_NO_BK_PARENS)) + goto normal_char; + rparen: + caret_allowed = 0; + closure_allowed = 1; + return _RPAREN; + + case '.': + zeroset(cset); + notset(cset); + clrbit('\n', cset); + caret_allowed = 0; + closure_allowed = 1; + return _SET + charset_index(cset); + + case '[': + zeroset(cset); + FETCH(c, "Unbalanced ["); + if (c == '^') + { + FETCH(c, "Unbalanced ["); + invert = 1; + } + else + invert = 0; + do + { + FETCH(c2, "Unbalanced ["); + if ((syntax_bits & RE_AWK_CLASS_HACK) && c == '\\') + { + c = c2; + FETCH(c2, "Unbalanced ["); + } + if (c2 == '-') + { + FETCH(c2, "Unbalanced ["); + if (c2 == ']' && (syntax_bits & RE_AWK_CLASS_HACK)) + { + setbit(c, cset); + setbit('-', cset); + break; + } + while (c <= c2) + setbit(c++, cset); + FETCH(c, "Unbalanced ["); + } + else + { + setbit(c, cset); + c = c2; + } + } + while (c != ']'); + if (invert) + notset(cset); + caret_allowed = 0; + closure_allowed = 1; + return _SET + charset_index(cset); + + default: + normal_char: + caret_allowed = 0; + closure_allowed = 1; + if (case_fold && ISALPHA(c)) + { + zeroset(cset); + if (isupper(c)) + c = tolower(c); + setbit(c, cset); + setbit(toupper(c), cset); + return _SET + charset_index(cset); + } + return c; + } +} + +/* Recursive descent parser for regular expressions. */ + +static _token tok; /* Lookahead token. */ +static depth; /* Current depth of a hypothetical stack + holding deferred productions. This is + used to determine the depth that will be + required of the real stack later on in + reganalyze(). */ + +/* Add the given token to the parse tree, maintaining the depth count and + updating the maximum depth if necessary. */ +static void +addtok(t) + _token t; +{ + REALLOC_IF_NECESSARY(reg->tokens, _token, reg->talloc, reg->tindex); + reg->tokens[reg->tindex++] = t; + + switch (t) + { + case _QMARK: + case _STAR: + case _PLUS: + break; + + case _CAT: + case _OR: + --depth; + break; + + default: + ++reg->nleaves; + case _EMPTY: + ++depth; + break; + } + if (depth > reg->depth) + reg->depth = depth; +} + +/* The grammar understood by the parser is as follows. + + start: + regexp + _ALLBEGLINE regexp + regexp _ALLENDLINE + _ALLBEGLINE regexp _ALLENDLINE + + regexp: + regexp _OR branch + branch + + branch: + branch closure + closure + + closure: + closure _QMARK + closure _STAR + closure _PLUS + atom + + atom: + <normal character> + _SET + _BACKREF + _BEGLINE + _ENDLINE + _BEGWORD + _ENDWORD + _LIMWORD + _NOTLIMWORD + <empty> + + The parser builds a parse tree in postfix form in an array of tokens. */ + +#ifdef __STDC__ +static void regexp(void); +#else +static void regexp(); +#endif + +static void +atom() +{ + if (tok >= 0 && (tok < _NOTCHAR || tok >= _SET || tok == _BACKREF + || tok == _BEGLINE || tok == _ENDLINE || tok == _BEGWORD + || tok == _ENDWORD || tok == _LIMWORD || tok == _NOTLIMWORD)) + { + addtok(tok); + tok = lex(); + } + else if (tok == _LPAREN) + { + tok = lex(); + regexp(); + if (tok != _RPAREN) + reg_error("Unbalanced ("); + tok = lex(); + } + else + addtok(_EMPTY); +} + +static void +closure() +{ + atom(); + while (tok == _QMARK || tok == _STAR || tok == _PLUS) + { + addtok(tok); + tok = lex(); + } +} + +static void +branch() +{ + closure(); + while (tok != _RPAREN && tok != _OR && tok != _ALLENDLINE && tok >= 0) + { + closure(); + addtok(_CAT); + } +} + +static void +regexp() +{ + branch(); + while (tok == _OR) + { + tok = lex(); + branch(); + addtok(_OR); + } +} + +/* Main entry point for the parser. S is a string to be parsed, len is the + length of the string, so s can include NUL characters. R is a pointer to + the struct regexp to parse into. */ +void +regparse(s, len, r) + const char *s; + size_t len; + struct regexp *r; +{ + reg = r; + lexstart = lexptr = s; + lexleft = len; + caret_allowed = 1; + closure_allowed = 0; + + if (! syntax_bits_set) + reg_error("No syntax specified"); + + tok = lex(); + depth = r->depth; + + if (tok == _ALLBEGLINE) + { + addtok(_BEGLINE); + tok = lex(); + regexp(); + addtok(_CAT); + } + else + regexp(); + + if (tok == _ALLENDLINE) + { + addtok(_ENDLINE); + addtok(_CAT); + tok = lex(); + } + + if (tok != _END) + reg_error("Unbalanced )"); + + addtok(_END - r->nregexps); + addtok(_CAT); + + if (r->nregexps) + addtok(_OR); + + ++r->nregexps; +} + +/* Some primitives for operating on sets of positions. */ + +/* Copy one set to another; the destination must be large enough. */ +static void +copy(src, dst) + const _position_set *src; + _position_set *dst; +{ + int i; + + for (i = 0; i < src->nelem; ++i) + dst->elems[i] = src->elems[i]; + dst->nelem = src->nelem; +} + +/* Insert a position in a set. Position sets are maintained in sorted + order according to index. If position already exists in the set with + the same index then their constraints are logically or'd together. + S->elems must point to an array large enough to hold the resulting set. */ +static void +insert(p, s) + _position p; + _position_set *s; +{ + int i; + _position t1, t2; + + for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i) + ; + if (i < s->nelem && p.index == s->elems[i].index) + s->elems[i].constraint |= p.constraint; + else + { + t1 = p; + ++s->nelem; + while (i < s->nelem) + { + t2 = s->elems[i]; + s->elems[i++] = t1; + t1 = t2; + } + } +} + +/* Merge two sets of positions into a third. The result is exactly as if + the positions of both sets were inserted into an initially empty set. */ +static void +merge(s1, s2, m) + _position_set *s1; + _position_set *s2; + _position_set *m; +{ + int i = 0, j = 0; + + m->nelem = 0; + while (i < s1->nelem && j < s2->nelem) + if (s1->elems[i].index > s2->elems[j].index) + m->elems[m->nelem++] = s1->elems[i++]; + else if (s1->elems[i].index < s2->elems[j].index) + m->elems[m->nelem++] = s2->elems[j++]; + else + { + m->elems[m->nelem] = s1->elems[i++]; + m->elems[m->nelem++].constraint |= s2->elems[j++].constraint; + } + while (i < s1->nelem) + m->elems[m->nelem++] = s1->elems[i++]; + while (j < s2->nelem) + m->elems[m->nelem++] = s2->elems[j++]; +} + +/* Delete a position from a set. */ +static void +delete(p, s) + _position p; + _position_set *s; +{ + int i; + + for (i = 0; i < s->nelem; ++i) + if (p.index == s->elems[i].index) + break; + if (i < s->nelem) + for (--s->nelem; i < s->nelem; ++i) + s->elems[i] = s->elems[i + 1]; +} + +/* Find the index of the state corresponding to the given position set with + the given preceding context, or create a new state if there is no such + state. Newline and letter tell whether we got here on a newline or + letter, respectively. */ +static int +state_index(r, s, newline, letter) + struct regexp *r; + _position_set *s; + int newline; + int letter; +{ + int lhash = 0; + int constraint; + int i, j; + + newline = newline ? 1 : 0; + letter = letter ? 1 : 0; + + for (i = 0; i < s->nelem; ++i) + lhash ^= s->elems[i].index + s->elems[i].constraint; + + /* Try to find a state that exactly matches the proposed one. */ + for (i = 0; i < r->sindex; ++i) + { + if (lhash != r->states[i].hash || s->nelem != r->states[i].elems.nelem + || newline != r->states[i].newline || letter != r->states[i].letter) + continue; + for (j = 0; j < s->nelem; ++j) + if (s->elems[j].constraint + != r->states[i].elems.elems[j].constraint + || s->elems[j].index != r->states[i].elems.elems[j].index) + break; + if (j == s->nelem) + return i; + } + + /* We'll have to create a new state. */ + REALLOC_IF_NECESSARY(r->states, _dfa_state, r->salloc, r->sindex); + r->states[i].hash = lhash; + MALLOC(r->states[i].elems.elems, _position, s->nelem); + copy(s, &r->states[i].elems); + r->states[i].newline = newline; + r->states[i].letter = letter; + r->states[i].backref = 0; + r->states[i].constraint = 0; + r->states[i].first_end = 0; + for (j = 0; j < s->nelem; ++j) + if (r->tokens[s->elems[j].index] < 0) + { + constraint = s->elems[j].constraint; + if (_SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0) + || _SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1) + || _SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0) + || _SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1)) + r->states[i].constraint |= constraint; + if (! r->states[i].first_end) + r->states[i].first_end = r->tokens[s->elems[j].index]; + } + else if (r->tokens[s->elems[j].index] == _BACKREF) + { + r->states[i].constraint = _NO_CONSTRAINT; + r->states[i].backref = 1; + } + + ++r->sindex; + + return i; +} + +/* Find the epsilon closure of a set of positions. If any position of the set + contains a symbol that matches the empty string in some context, replace + that position with the elements of its follow labeled with an appropriate + constraint. Repeat exhaustively until no funny positions are left. + S->elems must be large enough to hold the result. */ +static void +epsclosure(s, r) + _position_set *s; + struct regexp *r; +{ + int i, j; + int *visited; + _position p, old; + + MALLOC(visited, int, r->tindex); + for (i = 0; i < r->tindex; ++i) + visited[i] = 0; + + for (i = 0; i < s->nelem; ++i) + if (r->tokens[s->elems[i].index] >= _NOTCHAR + && r->tokens[s->elems[i].index] != _BACKREF + && r->tokens[s->elems[i].index] < _SET) + { + old = s->elems[i]; + p.constraint = old.constraint; + delete(s->elems[i], s); + if (visited[old.index]) + { + --i; + continue; + } + visited[old.index] = 1; + switch (r->tokens[old.index]) + { + case _BEGLINE: + p.constraint &= _BEGLINE_CONSTRAINT; + break; + case _ENDLINE: + p.constraint &= _ENDLINE_CONSTRAINT; + break; + case _BEGWORD: + p.constraint &= _BEGWORD_CONSTRAINT; + break; + case _ENDWORD: + p.constraint &= _ENDWORD_CONSTRAINT; + break; + case _LIMWORD: + p.constraint &= _ENDWORD_CONSTRAINT; + break; + case _NOTLIMWORD: + p.constraint &= _NOTLIMWORD_CONSTRAINT; + break; + default: + break; + } + for (j = 0; j < r->follows[old.index].nelem; ++j) + { + p.index = r->follows[old.index].elems[j].index; + insert(p, s); + } + /* Force rescan to start at the beginning. */ + i = -1; + } + + free(visited); +} + +/* Perform bottom-up analysis on the parse tree, computing various functions. + Note that at this point, we're pretending constructs like \< are real + characters rather than constraints on what can follow them. + + Nullable: A node is nullable if it is at the root of a regexp that can + match the empty string. + * _EMPTY leaves are nullable. + * No other leaf is nullable. + * A _QMARK or _STAR node is nullable. + * A _PLUS node is nullable if its argument is nullable. + * A _CAT node is nullable if both its arguments are nullable. + * An _OR node is nullable if either argument is nullable. + + Firstpos: The firstpos of a node is the set of positions (nonempty leaves) + that could correspond to the first character of a string matching the + regexp rooted at the given node. + * _EMPTY leaves have empty firstpos. + * The firstpos of a nonempty leaf is that leaf itself. + * The firstpos of a _QMARK, _STAR, or _PLUS node is the firstpos of its + argument. + * The firstpos of a _CAT node is the firstpos of the left argument, union + the firstpos of the right if the left argument is nullable. + * The firstpos of an _OR node is the union of firstpos of each argument. + + Lastpos: The lastpos of a node is the set of positions that could + correspond to the last character of a string matching the regexp at + the given node. + * _EMPTY leaves have empty lastpos. + * The lastpos of a nonempty leaf is that leaf itself. + * The lastpos of a _QMARK, _STAR, or _PLUS node is the lastpos of its + argument. + * The lastpos of a _CAT node is the lastpos of its right argument, union + the lastpos of the left if the right argument is nullable. + * The lastpos of an _OR node is the union of the lastpos of each argument. + + Follow: The follow of a position is the set of positions that could + correspond to the character following a character matching the node in + a string matching the regexp. At this point we consider special symbols + that match the empty string in some context to be just normal characters. + Later, if we find that a special symbol is in a follow set, we will + replace it with the elements of its follow, labeled with an appropriate + constraint. + * Every node in the firstpos of the argument of a _STAR or _PLUS node is in + the follow of every node in the lastpos. + * Every node in the firstpos of the second argument of a _CAT node is in + the follow of every node in the lastpos of the first argument. + + Because of the postfix representation of the parse tree, the depth-first + analysis is conveniently done by a linear scan with the aid of a stack. + Sets are stored as arrays of the elements, obeying a stack-like allocation + scheme; the number of elements in each set deeper in the stack can be + used to determine the address of a particular set's array. */ +void +reganalyze(r, searchflag) + struct regexp *r; + int searchflag; +{ + int *nullable; /* Nullable stack. */ + int *nfirstpos; /* Element count stack for firstpos sets. */ + _position *firstpos; /* Array where firstpos elements are stored. */ + int *nlastpos; /* Element count stack for lastpos sets. */ + _position *lastpos; /* Array where lastpos elements are stored. */ + int *nalloc; /* Sizes of arrays allocated to follow sets. */ + _position_set tmp; /* Temporary set for merging sets. */ + _position_set merged; /* Result of merging sets. */ + int wants_newline; /* True if some position wants newline info. */ + int *o_nullable; + int *o_nfirst, *o_nlast; + _position *o_firstpos, *o_lastpos; + int i, j; + _position *pos; + + r->searchflag = searchflag; + + MALLOC(nullable, int, r->depth); + o_nullable = nullable; + MALLOC(nfirstpos, int, r->depth); + o_nfirst = nfirstpos; + MALLOC(firstpos, _position, r->nleaves); + o_firstpos = firstpos, firstpos += r->nleaves; + MALLOC(nlastpos, int, r->depth); + o_nlast = nlastpos; + MALLOC(lastpos, _position, r->nleaves); + o_lastpos = lastpos, lastpos += r->nleaves; + MALLOC(nalloc, int, r->tindex); + for (i = 0; i < r->tindex; ++i) + nalloc[i] = 0; + MALLOC(merged.elems, _position, r->nleaves); + + CALLOC(r->follows, _position_set, r->tindex); + + for (i = 0; i < r->tindex; ++i) + switch (r->tokens[i]) + { + case _EMPTY: + /* The empty set is nullable. */ + *nullable++ = 1; + + /* The firstpos and lastpos of the empty leaf are both empty. */ + *nfirstpos++ = *nlastpos++ = 0; + break; + + case _STAR: + case _PLUS: + /* Every element in the firstpos of the argument is in the follow + of every element in the lastpos. */ + tmp.nelem = nfirstpos[-1]; + tmp.elems = firstpos; + pos = lastpos; + for (j = 0; j < nlastpos[-1]; ++j) + { + merge(&tmp, &r->follows[pos[j].index], &merged); + REALLOC_IF_NECESSARY(r->follows[pos[j].index].elems, _position, + nalloc[pos[j].index], merged.nelem - 1); + copy(&merged, &r->follows[pos[j].index]); + } + + case _QMARK: + /* A _QMARK or _STAR node is automatically nullable. */ + if (r->tokens[i] != _PLUS) + nullable[-1] = 1; + break; + + case _CAT: + /* Every element in the firstpos of the second argument is in the + follow of every element in the lastpos of the first argument. */ + tmp.nelem = nfirstpos[-1]; + tmp.elems = firstpos; + pos = lastpos + nlastpos[-1]; + for (j = 0; j < nlastpos[-2]; ++j) + { + merge(&tmp, &r->follows[pos[j].index], &merged); + REALLOC_IF_NECESSARY(r->follows[pos[j].index].elems, _position, + nalloc[pos[j].index], merged.nelem - 1); + copy(&merged, &r->follows[pos[j].index]); + } + + /* The firstpos of a _CAT node is the firstpos of the first argument, + union that of the second argument if the first is nullable. */ + if (nullable[-2]) + nfirstpos[-2] += nfirstpos[-1]; + else + firstpos += nfirstpos[-1]; + --nfirstpos; + + /* The lastpos of a _CAT node is the lastpos of the second argument, + union that of the first argument if the second is nullable. */ + if (nullable[-1]) + nlastpos[-2] += nlastpos[-1]; + else + { + pos = lastpos + nlastpos[-2]; + for (j = nlastpos[-1] - 1; j >= 0; --j) + pos[j] = lastpos[j]; + lastpos += nlastpos[-2]; + nlastpos[-2] = nlastpos[-1]; + } + --nlastpos; + + /* A _CAT node is nullable if both arguments are nullable. */ + nullable[-2] = nullable[-1] && nullable[-2]; + --nullable; + break; + + case _OR: + /* The firstpos is the union of the firstpos of each argument. */ + nfirstpos[-2] += nfirstpos[-1]; + --nfirstpos; + + /* The lastpos is the union of the lastpos of each argument. */ + nlastpos[-2] += nlastpos[-1]; + --nlastpos; + + /* An _OR node is nullable if either argument is nullable. */ + nullable[-2] = nullable[-1] || nullable[-2]; + --nullable; + break; + + default: + /* Anything else is a nonempty position. (Note that special + constructs like \< are treated as nonempty strings here; + an "epsilon closure" effectively makes them nullable later. + Backreferences have to get a real position so we can detect + transitions on them later. But they are nullable. */ + *nullable++ = r->tokens[i] == _BACKREF; + + /* This position is in its own firstpos and lastpos. */ + *nfirstpos++ = *nlastpos++ = 1; + --firstpos, --lastpos; + firstpos->index = lastpos->index = i; + firstpos->constraint = lastpos->constraint = _NO_CONSTRAINT; + + /* Allocate the follow set for this position. */ + nalloc[i] = 1; + MALLOC(r->follows[i].elems, _position, nalloc[i]); + break; + } + + /* For each follow set that is the follow set of a real position, replace + it with its epsilon closure. */ + for (i = 0; i < r->tindex; ++i) + if (r->tokens[i] < _NOTCHAR || r->tokens[i] == _BACKREF + || r->tokens[i] >= _SET) + { + copy(&r->follows[i], &merged); + epsclosure(&merged, r); + if (r->follows[i].nelem < merged.nelem) + REALLOC(r->follows[i].elems, _position, merged.nelem); + copy(&merged, &r->follows[i]); + } + + /* Get the epsilon closure of the firstpos of the regexp. The result will + be the set of positions of state 0. */ + merged.nelem = 0; + for (i = 0; i < nfirstpos[-1]; ++i) + insert(firstpos[i], &merged); + epsclosure(&merged, r); + + /* Check if any of the positions of state 0 will want newline context. */ + wants_newline = 0; + for (i = 0; i < merged.nelem; ++i) + if (_PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint)) + wants_newline = 1; + + /* Build the initial state. */ + r->salloc = 1; + r->sindex = 0; + MALLOC(r->states, _dfa_state, r->salloc); + state_index(r, &merged, wants_newline, 0); + + free(o_nullable); + free(o_nfirst); + free(o_firstpos); + free(o_nlast); + free(o_lastpos); + free(nalloc); + free(merged.elems); +} + +/* Find, for each character, the transition out of state s of r, and store + it in the appropriate slot of trans. + + We divide the positions of s into groups (positions can appear in more + than one group). Each group is labeled with a set of characters that + every position in the group matches (taking into account, if necessary, + preceding context information of s). For each group, find the union + of the its elements' follows. This set is the set of positions of the + new state. For each character in the group's label, set the transition + on this character to be to a state corresponding to the set's positions, + and its associated backward context information, if necessary. + + If we are building a searching matcher, we include the positions of state + 0 in every state. + + The collection of groups is constructed by building an equivalence-class + partition of the positions of s. + + For each position, find the set of characters C that it matches. Eliminate + any characters from C that fail on grounds of backward context. + + Search through the groups, looking for a group whose label L has nonempty + intersection with C. If L - C is nonempty, create a new group labeled + L - C and having the same positions as the current group, and set L to + the intersection of L and C. Insert the position in this group, set + C = C - L, and resume scanning. + + If after comparing with every group there are characters remaining in C, + create a new group labeled with the characters of C and insert this + position in that group. */ +void +regstate(s, r, trans) + int s; + struct regexp *r; + int trans[]; +{ + _position_set grps[_NOTCHAR]; /* As many as will ever be needed. */ + _charset labels[_NOTCHAR]; /* Labels corresponding to the groups. */ + int ngrps = 0; /* Number of groups actually used. */ + _position pos; /* Current position being considered. */ + _charset matches; /* Set of matching characters. */ + int matchesf; /* True if matches is nonempty. */ + _charset intersect; /* Intersection with some label set. */ + int intersectf; /* True if intersect is nonempty. */ + _charset leftovers; /* Stuff in the label that didn't match. */ + int leftoversf; /* True if leftovers is nonempty. */ + static _charset letters; /* Set of characters considered letters. */ + static _charset newline; /* Set of characters that aren't newline. */ + _position_set follows; /* Union of the follows of some group. */ + _position_set tmp; /* Temporary space for merging sets. */ + int state; /* New state. */ + int wants_newline; /* New state wants to know newline context. */ + int state_newline; /* New state on a newline transition. */ + int wants_letter; /* New state wants to know letter context. */ + int state_letter; /* New state on a letter transition. */ + static initialized; /* Flag for static initialization. */ + int i, j, k; + + /* Initialize the set of letters, if necessary. */ + if (! initialized) + { + initialized = 1; + for (i = 0; i < _NOTCHAR; ++i) + if (ISALNUM(i)) + setbit(i, letters); + setbit('\n', newline); + } + + zeroset(matches); + + for (i = 0; i < r->states[s].elems.nelem; ++i) + { + pos = r->states[s].elems.elems[i]; + if (r->tokens[pos.index] >= 0 && r->tokens[pos.index] < _NOTCHAR) + setbit(r->tokens[pos.index], matches); + else if (r->tokens[pos.index] >= _SET) + copyset(r->charsets[r->tokens[pos.index] - _SET], matches); + else + continue; + + /* Some characters may need to be climinated from matches because + they fail in the current context. */ + if (pos.constraint != 0xff) + { + if (! _MATCHES_NEWLINE_CONTEXT(pos.constraint, + r->states[s].newline, 1)) + clrbit('\n', matches); + if (! _MATCHES_NEWLINE_CONTEXT(pos.constraint, + r->states[s].newline, 0)) + for (j = 0; j < _CHARSET_INTS; ++j) + matches[j] &= newline[j]; + if (! _MATCHES_LETTER_CONTEXT(pos.constraint, + r->states[s].letter, 1)) + for (j = 0; j < _CHARSET_INTS; ++j) + matches[j] &= ~letters[j]; + if (! _MATCHES_LETTER_CONTEXT(pos.constraint, + r->states[s].letter, 0)) + for (j = 0; j < _CHARSET_INTS; ++j) + matches[j] &= letters[j]; + + /* If there are no characters left, there's no point in going on. */ + for (j = 0; j < _CHARSET_INTS && !matches[j]; ++j) + ; + if (j == _CHARSET_INTS) + continue; + } + + for (j = 0; j < ngrps; ++j) + { + /* If matches contains a single character only, and the current + group's label doesn't contain that character, go on to the + next group. */ + if (r->tokens[pos.index] >= 0 && r->tokens[pos.index] < _NOTCHAR + && !tstbit(r->tokens[pos.index], labels[j])) + continue; + + /* Check if this group's label has a nonempty intersection with + matches. */ + intersectf = 0; + for (k = 0; k < _CHARSET_INTS; ++k) + (intersect[k] = matches[k] & labels[j][k]) ? intersectf = 1 : 0; + if (! intersectf) + continue; + + /* It does; now find the set differences both ways. */ + leftoversf = matchesf = 0; + for (k = 0; k < _CHARSET_INTS; ++k) + { + /* Even an optimizing compiler can't know this for sure. */ + int match = matches[k], label = labels[j][k]; + + (leftovers[k] = ~match & label) ? leftoversf = 1 : 0; + (matches[k] = match & ~label) ? matchesf = 1 : 0; + } + + /* If there were leftovers, create a new group labeled with them. */ + if (leftoversf) + { + copyset(leftovers, labels[ngrps]); + copyset(intersect, labels[j]); + MALLOC(grps[ngrps].elems, _position, r->nleaves); + copy(&grps[j], &grps[ngrps]); + ++ngrps; + } + + /* Put the position in the current group. Note that there is no + reason to call insert() here. */ + grps[j].elems[grps[j].nelem++] = pos; + + /* If every character matching the current position has been + accounted for, we're done. */ + if (! matchesf) + break; + } + + /* If we've passed the last group, and there are still characters + unaccounted for, then we'll have to create a new group. */ + if (j == ngrps) + { + copyset(matches, labels[ngrps]); + zeroset(matches); + MALLOC(grps[ngrps].elems, _position, r->nleaves); + grps[ngrps].nelem = 1; + grps[ngrps].elems[0] = pos; + ++ngrps; + } + } + + MALLOC(follows.elems, _position, r->nleaves); + MALLOC(tmp.elems, _position, r->nleaves); + + /* If we are a searching matcher, the default transition is to a state + containing the positions of state 0, otherwise the default transition + is to fail miserably. */ + if (r->searchflag) + { + wants_newline = 0; + wants_letter = 0; + for (i = 0; i < r->states[0].elems.nelem; ++i) + { + if (_PREV_NEWLINE_DEPENDENT(r->states[0].elems.elems[i].constraint)) + wants_newline = 1; + if (_PREV_LETTER_DEPENDENT(r->states[0].elems.elems[i].constraint)) + wants_letter = 1; + } + copy(&r->states[0].elems, &follows); + state = state_index(r, &follows, 0, 0); + if (wants_newline) + state_newline = state_index(r, &follows, 1, 0); + else + state_newline = state; + if (wants_letter) + state_letter = state_index(r, &follows, 0, 1); + else + state_letter = state; + for (i = 0; i < _NOTCHAR; ++i) + trans[i] = (ISALNUM(i)) ? state_letter : state ; + trans['\n'] = state_newline; + } + else + for (i = 0; i < _NOTCHAR; ++i) + trans[i] = -1; + + for (i = 0; i < ngrps; ++i) + { + follows.nelem = 0; + + /* Find the union of the follows of the positions of the group. + This is a hideously inefficient loop. Fix it someday. */ + for (j = 0; j < grps[i].nelem; ++j) + for (k = 0; k < r->follows[grps[i].elems[j].index].nelem; ++k) + insert(r->follows[grps[i].elems[j].index].elems[k], &follows); + + /* If we are building a searching matcher, throw in the positions + of state 0 as well. */ + if (r->searchflag) + for (j = 0; j < r->states[0].elems.nelem; ++j) + insert(r->states[0].elems.elems[j], &follows); + + /* Find out if the new state will want any context information. */ + wants_newline = 0; + if (tstbit('\n', labels[i])) + for (j = 0; j < follows.nelem; ++j) + if (_PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint)) + wants_newline = 1; + + wants_letter = 0; + for (j = 0; j < _CHARSET_INTS; ++j) + if (labels[i][j] & letters[j]) + break; + if (j < _CHARSET_INTS) + for (j = 0; j < follows.nelem; ++j) + if (_PREV_LETTER_DEPENDENT(follows.elems[j].constraint)) + wants_letter = 1; + + /* Find the state(s) corresponding to the union of the follows. */ + state = state_index(r, &follows, 0, 0); + if (wants_newline) + state_newline = state_index(r, &follows, 1, 0); + else + state_newline = state; + if (wants_letter) + state_letter = state_index(r, &follows, 0, 1); + else + state_letter = state; + + /* Set the transitions for each character in the current label. */ + for (j = 0; j < _CHARSET_INTS; ++j) + for (k = 0; k < INTBITS; ++k) + if (labels[i][j] & 1 << k) + { + int c = j * INTBITS + k; + + if (c == '\n') + trans[c] = state_newline; + else if (ISALNUM(c)) + trans[c] = state_letter; + else if (c < _NOTCHAR) + trans[c] = state; + } + } + + for (i = 0; i < ngrps; ++i) + free(grps[i].elems); + free(follows.elems); + free(tmp.elems); +} + +/* Some routines for manipulating a compiled regexp's transition tables. + Each state may or may not have a transition table; if it does, and it + is a non-accepting state, then r->trans[state] points to its table. + If it is an accepting state then r->fails[state] points to its table. + If it has no table at all, then r->trans[state] is NULL. + TODO: Improve this comment, get rid of the unnecessary redundancy. */ + +static void +build_state(s, r) + int s; + struct regexp *r; +{ + int *trans; /* The new transition table. */ + int i; + + /* Set an upper limit on the number of transition tables that will ever + exist at once. 1024 is arbitrary. The idea is that the frequently + used transition tables will be quickly rebuilt, whereas the ones that + were only needed once or twice will be cleared away. */ + if (r->trcount >= 1024) + { + for (i = 0; i < r->tralloc; ++i) + if (r->trans[i]) + { + free((ptr_t) r->trans[i]); + r->trans[i] = NULL; + } + else if (r->fails[i]) + { + free((ptr_t) r->fails[i]); + r->fails[i] = NULL; + } + r->trcount = 0; + } + + ++r->trcount; + + /* Set up the success bits for this state. */ + r->success[s] = 0; + if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 1, r->states[s].letter, 0, + s, *r)) + r->success[s] |= 4; + if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 0, r->states[s].letter, 1, + s, *r)) + r->success[s] |= 2; + if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 0, r->states[s].letter, 0, + s, *r)) + r->success[s] |= 1; + + MALLOC(trans, int, _NOTCHAR); + regstate(s, r, trans); + + /* Now go through the new transition table, and make sure that the trans + and fail arrays are allocated large enough to hold a pointer for the + largest state mentioned in the table. */ + for (i = 0; i < _NOTCHAR; ++i) + if (trans[i] >= r->tralloc) + { + int oldalloc = r->tralloc; + + while (trans[i] >= r->tralloc) + r->tralloc *= 2; + REALLOC(r->realtrans, int *, r->tralloc + 1); + r->trans = r->realtrans + 1; + REALLOC(r->fails, int *, r->tralloc); + REALLOC(r->success, int, r->tralloc); + REALLOC(r->newlines, int, r->tralloc); + while (oldalloc < r->tralloc) + { + r->trans[oldalloc] = NULL; + r->fails[oldalloc++] = NULL; + } + } + + /* Keep the newline transition in a special place so we can use it as + a sentinel. */ + r->newlines[s] = trans['\n']; + trans['\n'] = -1; + + if (ACCEPTING(s, *r)) + r->fails[s] = trans; + else + r->trans[s] = trans; +} + +static void +build_state_zero(r) + struct regexp *r; +{ + r->tralloc = 1; + r->trcount = 0; + CALLOC(r->realtrans, int *, r->tralloc + 1); + r->trans = r->realtrans + 1; + CALLOC(r->fails, int *, r->tralloc); + MALLOC(r->success, int, r->tralloc); + MALLOC(r->newlines, int, r->tralloc); + build_state(0, r); +} + +/* Search through a buffer looking for a match to the given struct regexp. + Find the first occurrence of a string matching the regexp in the buffer, + and the shortest possible version thereof. Return a pointer to the first + character after the match, or NULL if none is found. Begin points to + the beginning of the buffer, and end points to the first character after + its end. We store a newline in *end to act as a sentinel, so end had + better point somewhere valid. Newline is a flag indicating whether to + allow newlines to be in the matching string. If count is non- + NULL it points to a place we're supposed to increment every time we + see a newline. Finally, if backref is non-NULL it points to a place + where we're supposed to store a 1 if backreferencing happened and the + match needs to be verified by a backtracking matcher. Otherwise + we store a 0 in *backref. */ +char * +regexecute(r, begin, end, newline, count, backref) + struct regexp *r; + char *begin; + char *end; + int newline; + int *count; + int *backref; +{ + register s, s1, tmp; /* Current state. */ + register unsigned char *p; /* Current input character. */ + register **trans, *t; /* Copy of r->trans so it can be optimized + into a register. */ + static sbit[_NOTCHAR]; /* Table for anding with r->success. */ + static sbit_init; + + if (! sbit_init) + { + int i; + + sbit_init = 1; + for (i = 0; i < _NOTCHAR; ++i) + sbit[i] = (ISALNUM(i)) ? 2 : 1; + sbit['\n'] = 4; + } + + if (! r->tralloc) + build_state_zero(r); + + s = s1 = 0; + p = (unsigned char *) begin; + trans = r->trans; + *end = '\n'; + + for (;;) + { + while ((t = trans[s]) != 0) { /* hand-optimized loop */ + s1 = t[*p++]; + if ((t = trans[s1]) == 0) { + tmp = s ; s = s1 ; s1 = tmp ; /* swap */ + break; + } + s = t[*p++]; + } + + if (s >= 0 && p <= (unsigned char *) end && r->fails[s]) + { + if (r->success[s] & sbit[*p]) + { + if (backref) + *backref = (r->states[s].backref != 0); + return (char *) p; + } + + s1 = s; + s = r->fails[s][*p++]; + continue; + } + + /* If the previous character was a newline, count it. */ + if (count && (char *) p <= end && p[-1] == '\n') + ++*count; + + /* Check if we've run off the end of the buffer. */ + if ((char *) p >= end) + return NULL; + + if (s >= 0) + { + build_state(s, r); + trans = r->trans; + continue; + } + + if (p[-1] == '\n' && newline) + { + s = r->newlines[s1]; + continue; + } + + s = 0; + } +} + +/* Initialize the components of a regexp that the other routines don't + initialize for themselves. */ +void +reginit(r) + struct regexp *r; +{ + r->calloc = 1; + MALLOC(r->charsets, _charset, r->calloc); + r->cindex = 0; + + r->talloc = 1; + MALLOC(r->tokens, _token, r->talloc); + r->tindex = r->depth = r->nleaves = r->nregexps = 0; + + r->searchflag = 0; + r->tralloc = 0; +} + +/* Parse and analyze a single string of the given length. */ +void +regcompile(s, len, r, searchflag) + const char *s; + size_t len; + struct regexp *r; + int searchflag; +{ + if (case_fold) /* dummy folding in service of regmust() */ + { + char *regcopy; + int i; + + regcopy = malloc(len); + if (!regcopy) + reg_error("out of memory"); + + /* This is a complete kludge and could potentially break + \<letter> escapes . . . */ + case_fold = 0; + for (i = 0; i < len; ++i) + if (ISUPPER(s[i])) + regcopy[i] = tolower(s[i]); + else + regcopy[i] = s[i]; + + reginit(r); + r->mustn = 0; + r->must[0] = '\0'; + regparse(regcopy, len, r); + free(regcopy); + regmust(r); + reganalyze(r, searchflag); + case_fold = 1; + reginit(r); + regparse(s, len, r); + reganalyze(r, searchflag); + } + else + { + reginit(r); + regparse(s, len, r); + regmust(r); + reganalyze(r, searchflag); + } +} + +/* Free the storage held by the components of a regexp. */ +void +reg_free(r) + struct regexp *r; +{ + int i; + + free((ptr_t) r->charsets); + free((ptr_t) r->tokens); + for (i = 0; i < r->sindex; ++i) + free((ptr_t) r->states[i].elems.elems); + free((ptr_t) r->states); + for (i = 0; i < r->tindex; ++i) + if (r->follows[i].elems) + free((ptr_t) r->follows[i].elems); + free((ptr_t) r->follows); + for (i = 0; i < r->tralloc; ++i) + if (r->trans[i]) + free((ptr_t) r->trans[i]); + else if (r->fails[i]) + free((ptr_t) r->fails[i]); + if (r->realtrans) + free((ptr_t) r->realtrans); + if (r->fails) + free((ptr_t) r->fails); + if (r->newlines) + free((ptr_t) r->newlines); +} + +/* +Having found the postfix representation of the regular expression, +try to find a long sequence of characters that must appear in any line +containing the r.e. +Finding a "longest" sequence is beyond the scope here; +we take an easy way out and hope for the best. +(Take "(ab|a)b"--please.) + +We do a bottom-up calculation of sequences of characters that must appear +in matches of r.e.'s represented by trees rooted at the nodes of the postfix +representation: + sequences that must appear at the left of the match ("left") + sequences that must appear at the right of the match ("right") + lists of sequences that must appear somewhere in the match ("in") + sequences that must constitute the match ("is") +When we get to the root of the tree, we use one of the longest of its +calculated "in" sequences as our answer. The sequence we find is returned in +r->must (where "r" is the single argument passed to "regmust"); +the length of the sequence is returned in r->mustn. + +The sequences calculated for the various types of node (in pseudo ANSI c) +are shown below. "p" is the operand of unary operators (and the left-hand +operand of binary operators); "q" is the right-hand operand of binary operators +. +"ZERO" means "a zero-length sequence" below. + +Type left right is in +---- ---- ----- -- -- +char c # c # c # c # c + +SET ZERO ZERO ZERO ZERO + +STAR ZERO ZERO ZERO ZERO + +QMARK ZERO ZERO ZERO ZERO + +PLUS p->left p->right ZERO p->in + +CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus + p->left : q->right : q->is!=ZERO) ? q->in plus + p->is##q->left p->right##q->is p->is##q->is : p->right##q->left + ZERO + +OR longest common longest common (do p->is and substrings common to + leading trailing q->is have same p->in and q->in + (sub)sequence (sub)sequence length and + of p->left of p->right content) ? + and q->left and q->right p->is : NULL + +If there's anything else we recognize in the tree, all four sequences get set +to zero-length sequences. If there's something we don't recognize in the tree, +we just return a zero-length sequence. + +Break ties in favor of infrequent letters (choosing 'zzz' in preference to +'aaa')? + +And. . .is it here or someplace that we might ponder "optimizations" such as + egrep 'psi|epsilon' -> egrep 'psi' + egrep 'pepsi|epsilon' -> egrep 'epsi' + (Yes, we now find "epsi" as a "string + that must occur", but we might also + simplify the *entire* r.e. being sought +) + grep '[c]' -> grep 'c' + grep '(ab|a)b' -> grep 'ab' + grep 'ab*' -> grep 'a' + grep 'a*b' -> grep 'b' +There are several issues: + Is optimization easy (enough)? + + Does optimization actually accomplish anything, + or is the automaton you get from "psi|epsilon" (for example) + the same as the one you get from "psi" (for example)? + + Are optimizable r.e.'s likely to be used in real-life situations + (something like 'ab*' is probably unlikely; something like is + 'psi|epsilon' is likelier)? +*/ + +static char * +icatalloc(old, new) +char * old; +const char * new; +{ + register char * result; + register int oldsize, newsize; + + newsize = (new == NULL) ? 0 : strlen(new); + if (old == NULL) + oldsize = 0; + else if (newsize == 0) + return old; + else oldsize = strlen(old); + if (old == NULL) + result = (char *) malloc(newsize + 1); + else result = (char *) realloc((void *) old, oldsize + newsize + 1); + if (result != NULL && new != NULL) + (void) strcpy(result + oldsize, new); + return result; +} + +static char * +icpyalloc(string) +const char * string; +{ + return icatalloc((char *) NULL, string); +} + +static char * +istrstr(lookin, lookfor) +char * lookin; +register char * lookfor; +{ + register char * cp; + register int len; + + len = strlen(lookfor); + for (cp = lookin; *cp != '\0'; ++cp) + if (strncmp(cp, lookfor, len) == 0) + return cp; + return NULL; +} + +static void +ifree(cp) +char * cp; +{ + if (cp != NULL) + free(cp); +} + +static void +freelist(cpp) +register char ** cpp; +{ + register int i; + + if (cpp == NULL) + return; + for (i = 0; cpp[i] != NULL; ++i) { + free(cpp[i]); + cpp[i] = NULL; + } +} + +static char ** +enlist(cpp, new, len) +register char ** cpp; +register char * new; +#ifdef __STDC__ +size_t len; +#else +int len; +#endif +{ + register int i, j; + + if (cpp == NULL) + return NULL; + if ((new = icpyalloc(new)) == NULL) { + freelist(cpp); + return NULL; + } + new[len] = '\0'; + /* + ** Is there already something in the list that's new (or longer)? + */ + for (i = 0; cpp[i] != NULL; ++i) + if (istrstr(cpp[i], new) != NULL) { + free(new); + return cpp; + } + /* + ** Eliminate any obsoleted strings. + */ + j = 0; + while (cpp[j] != NULL) + if (istrstr(new, cpp[j]) == NULL) + ++j; + else { + free(cpp[j]); + if (--i == j) + break; + cpp[j] = cpp[i]; + } + /* + ** Add the new string. + */ + cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp); + if (cpp == NULL) + return NULL; + cpp[i] = new; + cpp[i + 1] = NULL; + return cpp; +} + +/* +** Given pointers to two strings, +** return a pointer to an allocated list of their distinct common substrings. +** Return NULL if something seems wild. +*/ + +static char ** +comsubs(left, right) +char * left; +char * right; +{ + register char ** cpp; + register char * lcp; + register char * rcp; + register int i, len; + + if (left == NULL || right == NULL) + return NULL; + cpp = (char **) malloc(sizeof *cpp); + if (cpp == NULL) + return NULL; + cpp[0] = NULL; + for (lcp = left; *lcp != '\0'; ++lcp) { + len = 0; + rcp = strchr(right, *lcp); + while (rcp != NULL) { + for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) + ; + if (i > len) + len = i; + rcp = strchr(rcp + 1, *lcp); + } + if (len == 0) + continue; +#ifdef __STDC__ + if ((cpp = enlist(cpp, lcp, (size_t)len)) == NULL) +#else + if ((cpp = enlist(cpp, lcp, len)) == NULL) +#endif + break; + } + return cpp; +} + +static char ** +addlists(old, new) +char ** old; +char ** new; +{ + register int i; + + if (old == NULL || new == NULL) + return NULL; + for (i = 0; new[i] != NULL; ++i) { + old = enlist(old, new[i], strlen(new[i])); + if (old == NULL) + break; + } + return old; +} + +/* +** Given two lists of substrings, +** return a new list giving substrings common to both. +*/ + +static char ** +inboth(left, right) +char ** left; +char ** right; +{ + register char ** both; + register char ** temp; + register int lnum, rnum; + + if (left == NULL || right == NULL) + return NULL; + both = (char **) malloc(sizeof *both); + if (both == NULL) + return NULL; + both[0] = NULL; + for (lnum = 0; left[lnum] != NULL; ++lnum) { + for (rnum = 0; right[rnum] != NULL; ++rnum) { + temp = comsubs(left[lnum], right[rnum]); + if (temp == NULL) { + freelist(both); + return NULL; + } + both = addlists(both, temp); + freelist(temp); + if (both == NULL) + return NULL; + } + } + return both; +} + +/* +typedef struct { + char ** in; + char * left; + char * right; + char * is; +} must; + */ +static void +resetmust(mp) +register must * mp; +{ + mp->left[0] = mp->right[0] = mp->is[0] = '\0'; + freelist(mp->in); +} + +static void +regmust(r) +register struct regexp * r; +{ + register must * musts; + register must * mp; + register char * result = ""; + register int ri; + register int i; + register _token t; + static must must0; + + reg->mustn = 0; + reg->must[0] = '\0'; + musts = (must *) malloc((reg->tindex + 1) * sizeof *musts); + if (musts == NULL) + return; + mp = musts; + for (i = 0; i <= reg->tindex; ++i) + mp[i] = must0; + for (i = 0; i <= reg->tindex; ++i) { + mp[i].in = (char **) malloc(sizeof *mp[i].in); + mp[i].left = malloc(2); + mp[i].right = malloc(2); + mp[i].is = malloc(2); + if (mp[i].in == NULL || mp[i].left == NULL || + mp[i].right == NULL || mp[i].is == NULL) + goto done; + mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0'; + mp[i].in[0] = NULL; + } + for (ri = 0; ri < reg->tindex; ++ri) { + switch (t = reg->tokens[ri]) { + case _ALLBEGLINE: + case _ALLENDLINE: + case _LPAREN: + case _RPAREN: + goto done; /* "cannot happen" */ + case _EMPTY: + case _BEGLINE: + case _ENDLINE: + case _BEGWORD: + case _ENDWORD: + case _LIMWORD: + case _NOTLIMWORD: + case _BACKREF: + resetmust(mp); + break; + case _STAR: + case _QMARK: + if (mp <= musts) + goto done; /* "cannot happen" */ + --mp; + resetmust(mp); + break; + case _OR: + if (mp < &musts[2]) + goto done; /* "cannot happen" */ + { + register char ** new; + register must * lmp; + register must * rmp; + register int j, ln, rn, n; + + rmp = --mp; + lmp = --mp; + /* Guaranteed to be. Unlikely, but. . . */ + if (strcmp(lmp->is, rmp->is) != 0) + lmp->is[0] = '\0'; + /* Left side--easy */ + i = 0; + while (lmp->left[i] != '\0' && + lmp->left[i] == rmp->left[i]) + ++i; + lmp->left[i] = '\0'; + /* Right side */ + ln = strlen(lmp->right); + rn = strlen(rmp->right); + n = ln; + if (n > rn) + n = rn; + for (i = 0; i < n; ++i) + if (lmp->right[ln - i - 1] != + rmp->right[rn - i - 1]) + break; + for (j = 0; j < i; ++j) + lmp->right[j] = + lmp->right[(ln - i) + j]; + lmp->right[j] = '\0'; + new = inboth(lmp->in, rmp->in); + if (new == NULL) + goto done; + freelist(lmp->in); + free((char *) lmp->in); + lmp->in = new; + } + break; + case _PLUS: + if (mp <= musts) + goto done; /* "cannot happen" */ + --mp; + mp->is[0] = '\0'; + break; + case _END: + if (mp != &musts[1]) + goto done; /* "cannot happen" */ + for (i = 0; musts[0].in[i] != NULL; ++i) + if (strlen(musts[0].in[i]) > strlen(result)) + result = musts[0].in[i]; + goto done; + case _CAT: + if (mp < &musts[2]) + goto done; /* "cannot happen" */ + { + register must * lmp; + register must * rmp; + + rmp = --mp; + lmp = --mp; + /* + ** In. Everything in left, plus everything in + ** right, plus catenation of + ** left's right and right's left. + */ + lmp->in = addlists(lmp->in, rmp->in); + if (lmp->in == NULL) + goto done; + if (lmp->right[0] != '\0' && + rmp->left[0] != '\0') { + register char * tp; + + tp = icpyalloc(lmp->right); + if (tp == NULL) + goto done; + tp = icatalloc(tp, rmp->left); + if (tp == NULL) + goto done; + lmp->in = enlist(lmp->in, tp, + strlen(tp)); + free(tp); + if (lmp->in == NULL) + goto done; + } + /* Left-hand */ + if (lmp->is[0] != '\0') { + lmp->left = icatalloc(lmp->left, + rmp->left); + if (lmp->left == NULL) + goto done; + } + /* Right-hand */ + if (rmp->is[0] == '\0') + lmp->right[0] = '\0'; + lmp->right = icatalloc(lmp->right, rmp->right); + if (lmp->right == NULL) + goto done; + /* Guaranteed to be */ + if (lmp->is[0] != '\0' && rmp->is[0] != '\0') { + lmp->is = icatalloc(lmp->is, rmp->is); + if (lmp->is == NULL) + goto done; + } + } + break; + default: + if (t < _END) { + /* "cannot happen" */ + goto done; + } else if (t == '\0') { + /* not on *my* shift */ + goto done; + } else if (t >= _SET) { + /* easy enough */ + resetmust(mp); + } else { + /* plain character */ + resetmust(mp); + mp->is[0] = mp->left[0] = mp->right[0] = t; + mp->is[1] = mp->left[1] = mp->right[1] = '\0'; + mp->in = enlist(mp->in, mp->is, 1); + if (mp->in == NULL) + goto done; + } + break; + } + ++mp; + } +done: + (void) strncpy(reg->must, result, MUST_MAX - 1); + reg->must[MUST_MAX - 1] = '\0'; + reg->mustn = strlen(reg->must); + mp = musts; + for (i = 0; i <= reg->tindex; ++i) { + freelist(mp[i].in); + ifree((char *) mp[i].in); + ifree(mp[i].left); + ifree(mp[i].right); + ifree(mp[i].is); + } + free((char *) mp); +} diff --git a/gnu/usr.bin/awk/dfa.h b/gnu/usr.bin/awk/dfa.h new file mode 100644 index 0000000..65fc495 --- /dev/null +++ b/gnu/usr.bin/awk/dfa.h @@ -0,0 +1,543 @@ +/* dfa.h - declarations for GNU deterministic regexp compiler + Copyright (C) 1988 Free Software Foundation, Inc. + Written June, 1988 by Mike Haertel + + NO WARRANTY + + BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY +NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT +WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC, +RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS" +WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY +AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M. +STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY +WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE +LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR +OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR +DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR +A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS +PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. + + GENERAL PUBLIC LICENSE TO COPY + + 1. You may copy and distribute verbatim copies of this source file +as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy a valid copyright notice "Copyright + (C) 1988 Free Software Foundation, Inc."; and include following the +copyright notice a verbatim copy of the above disclaimer of warranty +and of this License. You may charge a distribution fee for the +physical act of transferring a copy. + + 2. You may modify your copy or copies of this source file or +any portion of it, and copy and distribute such modifications under +the terms of Paragraph 1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating + that you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, + that in whole or in part contains or is a derivative of this + program or any part thereof, to be licensed at no charge to all + third parties on terms identical to those contained in this + License Agreement (except that you may choose to grant more extensive + warranty protection to some or all third parties, at your option). + + c) You may charge a distribution fee for the physical act of + transferring a copy, and you may at your option offer warranty + protection in exchange for a fee. + +Mere aggregation of another unrelated program with this program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other program under the scope of these terms. + + 3. You may copy and distribute this program or any portion of it in +compiled, executable or object code form under the terms of Paragraphs +1 and 2 above provided that you do the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal + shipping charge) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +For an executable file, complete source code means all the source code for +all modules it contains; but, as a special exception, it need not include +source code for modules which are standard libraries that accompany the +operating system on which the executable file runs. + + 4. You may not copy, sublicense, distribute or transfer this program +except as expressly provided under this License Agreement. Any attempt +otherwise to copy, sublicense, distribute or transfer this program is void and +your rights to use the program under this License agreement shall be +automatically terminated. However, parties who have received computer +software programs from you with this License Agreement will not have +their licenses terminated so long as such parties remain in full compliance. + + 5. If you wish to incorporate parts of this program into other free +programs whose distribution conditions are different, write to the Free +Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet +worked out a simple rule that can be stated here, but we will often permit +this. We will be guided by the two goals of preserving the free status of +all derivatives our free software and of promoting the sharing and reuse of +software. + + +In other words, you are welcome to use, share and improve this program. +You are forbidden to forbid anyone else to use, share and improve +what you give them. Help stamp out software-hoarding! */ + +#ifdef __STDC__ + +#ifdef SOMEDAY +#define ISALNUM(c) isalnum(c) +#define ISALPHA(c) isalpha(c) +#define ISUPPER(c) isupper(c) +#else +#define ISALNUM(c) (isascii(c) && isalnum(c)) +#define ISALPHA(c) (isascii(c) && isalpha(c)) +#define ISUPPER(c) (isascii(c) && isupper(c)) +#endif + +#else /* ! __STDC__ */ + +#define const + +#define ISALNUM(c) (isascii(c) && isalnum(c)) +#define ISALPHA(c) (isascii(c) && isalpha(c)) +#define ISUPPER(c) (isascii(c) && isupper(c)) + +#endif /* ! __STDC__ */ + +/* 1 means plain parentheses serve as grouping, and backslash + parentheses are needed for literal searching. + 0 means backslash-parentheses are grouping, and plain parentheses + are for literal searching. */ +#define RE_NO_BK_PARENS 1L + +/* 1 means plain | serves as the "or"-operator, and \| is a literal. + 0 means \| serves as the "or"-operator, and | is a literal. */ +#define RE_NO_BK_VBAR (1L << 1) + +/* 0 means plain + or ? serves as an operator, and \+, \? are literals. + 1 means \+, \? are operators and plain +, ? are literals. */ +#define RE_BK_PLUS_QM (1L << 2) + +/* 1 means | binds tighter than ^ or $. + 0 means the contrary. */ +#define RE_TIGHT_VBAR (1L << 3) + +/* 1 means treat \n as an _OR operator + 0 means treat it as a normal character */ +#define RE_NEWLINE_OR (1L << 4) + +/* 0 means that a special characters (such as *, ^, and $) always have + their special meaning regardless of the surrounding context. + 1 means that special characters may act as normal characters in some + contexts. Specifically, this applies to: + ^ - only special at the beginning, or after ( or | + $ - only special at the end, or before ) or | + *, +, ? - only special when not after the beginning, (, or | */ +#define RE_CONTEXT_INDEP_OPS (1L << 5) + +/* 1 means that \ in a character class escapes the next character (typically + a hyphen. It also is overloaded to mean that hyphen at the end of the range + is allowable and means that the hyphen is to be taken literally. */ +#define RE_AWK_CLASS_HACK (1L << 6) + +/* Now define combinations of bits for the standard possibilities. */ +#ifdef notdef +#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS) +#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR) +#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR) +#define RE_SYNTAX_EMACS 0 +#endif + +/* The NULL pointer. */ +#ifndef NULL +#define NULL 0 +#endif + +/* Number of bits in an unsigned char. */ +#ifndef CHARBITS +#define CHARBITS 8 +#endif + +/* First integer value that is greater than any character code. */ +#define _NOTCHAR (1 << CHARBITS) + +/* INTBITS need not be exact, just a lower bound. */ +#ifndef INTBITS +#define INTBITS (CHARBITS * sizeof (int)) +#endif + +/* Number of ints required to hold a bit for every character. */ +#define _CHARSET_INTS ((_NOTCHAR + INTBITS - 1) / INTBITS) + +/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ +typedef int _charset[_CHARSET_INTS]; + +/* The regexp is parsed into an array of tokens in postfix form. Some tokens + are operators and others are terminal symbols. Most (but not all) of these + codes are returned by the lexical analyzer. */ +#ifdef __STDC__ + +typedef enum +{ + _END = -1, /* _END is a terminal symbol that matches the + end of input; any value of _END or less in + the parse tree is such a symbol. Accepting + states of the DFA are those that would have + a transition on _END. */ + + /* Ordinary character values are terminal symbols that match themselves. */ + + _EMPTY = _NOTCHAR, /* _EMPTY is a terminal symbol that matches + the empty string. */ + + _BACKREF, /* _BACKREF is generated by \<digit>; it + it not completely handled. If the scanner + detects a transition on backref, it returns + a kind of "semi-success" indicating that + the match will have to be verified with + a backtracking matcher. */ + + _BEGLINE, /* _BEGLINE is a terminal symbol that matches + the empty string if it is at the beginning + of a line. */ + + _ALLBEGLINE, /* _ALLBEGLINE is a terminal symbol that + matches the empty string if it is at the + beginning of a line; _ALLBEGLINE applies + to the entire regexp and can only occur + as the first token thereof. _ALLBEGLINE + never appears in the parse tree; a _BEGLINE + is prepended with _CAT to the entire + regexp instead. */ + + _ENDLINE, /* _ENDLINE is a terminal symbol that matches + the empty string if it is at the end of + a line. */ + + _ALLENDLINE, /* _ALLENDLINE is to _ENDLINE as _ALLBEGLINE + is to _BEGLINE. */ + + _BEGWORD, /* _BEGWORD is a terminal symbol that matches + the empty string if it is at the beginning + of a word. */ + + _ENDWORD, /* _ENDWORD is a terminal symbol that matches + the empty string if it is at the end of + a word. */ + + _LIMWORD, /* _LIMWORD is a terminal symbol that matches + the empty string if it is at the beginning + or the end of a word. */ + + _NOTLIMWORD, /* _NOTLIMWORD is a terminal symbol that + matches the empty string if it is not at + the beginning or end of a word. */ + + _QMARK, /* _QMARK is an operator of one argument that + matches zero or one occurences of its + argument. */ + + _STAR, /* _STAR is an operator of one argument that + matches the Kleene closure (zero or more + occurrences) of its argument. */ + + _PLUS, /* _PLUS is an operator of one argument that + matches the positive closure (one or more + occurrences) of its argument. */ + + _CAT, /* _CAT is an operator of two arguments that + matches the concatenation of its + arguments. _CAT is never returned by the + lexical analyzer. */ + + _OR, /* _OR is an operator of two arguments that + matches either of its arguments. */ + + _LPAREN, /* _LPAREN never appears in the parse tree, + it is only a lexeme. */ + + _RPAREN, /* _RPAREN never appears in the parse tree. */ + + _SET /* _SET and (and any value greater) is a + terminal symbol that matches any of a + class of characters. */ +} _token; + +#else /* ! __STDC__ */ + +typedef short _token; + +#define _END -1 +#define _EMPTY _NOTCHAR +#define _BACKREF (_EMPTY + 1) +#define _BEGLINE (_EMPTY + 2) +#define _ALLBEGLINE (_EMPTY + 3) +#define _ENDLINE (_EMPTY + 4) +#define _ALLENDLINE (_EMPTY + 5) +#define _BEGWORD (_EMPTY + 6) +#define _ENDWORD (_EMPTY + 7) +#define _LIMWORD (_EMPTY + 8) +#define _NOTLIMWORD (_EMPTY + 9) +#define _QMARK (_EMPTY + 10) +#define _STAR (_EMPTY + 11) +#define _PLUS (_EMPTY + 12) +#define _CAT (_EMPTY + 13) +#define _OR (_EMPTY + 14) +#define _LPAREN (_EMPTY + 15) +#define _RPAREN (_EMPTY + 16) +#define _SET (_EMPTY + 17) + +#endif /* ! __STDC__ */ + +/* Sets are stored in an array in the compiled regexp; the index of the + array corresponding to a given set token is given by _SET_INDEX(t). */ +#define _SET_INDEX(t) ((t) - _SET) + +/* Sometimes characters can only be matched depending on the surrounding + context. Such context decisions depend on what the previous character + was, and the value of the current (lookahead) character. Context + dependent constraints are encoded as 8 bit integers. Each bit that + is set indicates that the constraint succeeds in the corresponding + context. + + bit 7 - previous and current are newlines + bit 6 - previous was newline, current isn't + bit 5 - previous wasn't newline, current is + bit 4 - neither previous nor current is a newline + bit 3 - previous and current are word-constituents + bit 2 - previous was word-constituent, current isn't + bit 1 - previous wasn't word-constituent, current is + bit 0 - neither previous nor current is word-constituent + + Word-constituent characters are those that satisfy isalnum(). + + The macro _SUCCEEDS_IN_CONTEXT determines whether a a given constraint + succeeds in a particular context. Prevn is true if the previous character + was a newline, currn is true if the lookahead character is a newline. + Prevl and currl similarly depend upon whether the previous and current + characters are word-constituent letters. */ +#define _MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \ + ((constraint) & (1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))) +#define _MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \ + ((constraint) & (1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))) +#define _SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \ + (_MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \ + && _MATCHES_LETTER_CONTEXT(constraint, prevl, currl)) + +/* The following macros give information about what a constraint depends on. */ +#define _PREV_NEWLINE_DEPENDENT(constraint) \ + (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30)) +#define _PREV_LETTER_DEPENDENT(constraint) \ + (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03)) + +/* Tokens that match the empty string subject to some constraint actually + work by applying that constraint to determine what may follow them, + taking into account what has gone before. The following values are + the constraints corresponding to the special tokens previously defined. */ +#define _NO_CONSTRAINT 0xff +#define _BEGLINE_CONSTRAINT 0xcf +#define _ENDLINE_CONSTRAINT 0xaf +#define _BEGWORD_CONSTRAINT 0xf2 +#define _ENDWORD_CONSTRAINT 0xf4 +#define _LIMWORD_CONSTRAINT 0xf6 +#define _NOTLIMWORD_CONSTRAINT 0xf9 + +/* States of the recognizer correspond to sets of positions in the parse + tree, together with the constraints under which they may be matched. + So a position is encoded as an index into the parse tree together with + a constraint. */ +typedef struct +{ + unsigned index; /* Index into the parse array. */ + unsigned constraint; /* Constraint for matching this position. */ +} _position; + +/* Sets of positions are stored as arrays. */ +typedef struct +{ + _position *elems; /* Elements of this position set. */ + int nelem; /* Number of elements in this set. */ +} _position_set; + +/* A state of the regexp consists of a set of positions, some flags, + and the token value of the lowest-numbered position of the state that + contains an _END token. */ +typedef struct +{ + int hash; /* Hash of the positions of this state. */ + _position_set elems; /* Positions this state could match. */ + char newline; /* True if previous state matched newline. */ + char letter; /* True if previous state matched a letter. */ + char backref; /* True if this state matches a \<digit>. */ + unsigned char constraint; /* Constraint for this state to accept. */ + int first_end; /* Token value of the first _END in elems. */ +} _dfa_state; + +/* If an r.e. is at most MUST_MAX characters long, we look for a string which + must appear in it; whatever's found is dropped into the struct reg. */ + +#define MUST_MAX 50 + +/* A compiled regular expression. */ +struct regexp +{ + /* Stuff built by the scanner. */ + _charset *charsets; /* Array of character sets for _SET tokens. */ + int cindex; /* Index for adding new charsets. */ + int calloc; /* Number of charsets currently allocated. */ + + /* Stuff built by the parser. */ + _token *tokens; /* Postfix parse array. */ + int tindex; /* Index for adding new tokens. */ + int talloc; /* Number of tokens currently allocated. */ + int depth; /* Depth required of an evaluation stack + used for depth-first traversal of the + parse tree. */ + int nleaves; /* Number of leaves on the parse tree. */ + int nregexps; /* Count of parallel regexps being built + with regparse(). */ + + /* Stuff owned by the state builder. */ + _dfa_state *states; /* States of the regexp. */ + int sindex; /* Index for adding new states. */ + int salloc; /* Number of states currently allocated. */ + + /* Stuff built by the structure analyzer. */ + _position_set *follows; /* Array of follow sets, indexed by position + index. The follow of a position is the set + of positions containing characters that + could conceivably follow a character + matching the given position in a string + matching the regexp. Allocated to the + maximum possible position index. */ + int searchflag; /* True if we are supposed to build a searching + as opposed to an exact matcher. A searching + matcher finds the first and shortest string + matching a regexp anywhere in the buffer, + whereas an exact matcher finds the longest + string matching, but anchored to the + beginning of the buffer. */ + + /* Stuff owned by the executor. */ + int tralloc; /* Number of transition tables that have + slots so far. */ + int trcount; /* Number of transition tables that have + actually been built. */ + int **trans; /* Transition tables for states that can + never accept. If the transitions for a + state have not yet been computed, or the + state could possibly accept, its entry in + this table is NULL. */ + int **realtrans; /* Trans always points to realtrans + 1; this + is so trans[-1] can contain NULL. */ + int **fails; /* Transition tables after failing to accept + on a state that potentially could do so. */ + int *success; /* Table of acceptance conditions used in + regexecute and computed in build_state. */ + int *newlines; /* Transitions on newlines. The entry for a + newline in any transition table is always + -1 so we can count lines without wasting + too many cycles. The transition for a + newline is stored separately and handled + as a special case. Newline is also used + as a sentinel at the end of the buffer. */ + char must[MUST_MAX]; + int mustn; +}; + +/* Some macros for user access to regexp internals. */ + +/* ACCEPTING returns true if s could possibly be an accepting state of r. */ +#define ACCEPTING(s, r) ((r).states[s].constraint) + +/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the + specified context. */ +#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, reg) \ + _SUCCEEDS_IN_CONTEXT((reg).states[state].constraint, \ + prevn, currn, prevl, currl) + +/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel + regexps that a given state could accept. Parallel regexps are numbered + starting at 1. */ +#define FIRST_MATCHING_REGEXP(state, reg) (-(reg).states[state].first_end) + +/* Entry points. */ + +#ifdef __STDC__ + +/* Regsyntax() takes two arguments; the first sets the syntax bits described + earlier in this file, and the second sets the case-folding flag. */ +extern void regsyntax(long, int); + +/* Compile the given string of the given length into the given struct regexp. + Final argument is a flag specifying whether to build a searching or an + exact matcher. */ +extern void regcompile(const char *, size_t, struct regexp *, int); + +/* Execute the given struct regexp on the buffer of characters. The + first char * points to the beginning, and the second points to the + first character after the end of the buffer, which must be a writable + place so a sentinel end-of-buffer marker can be stored there. The + second-to-last argument is a flag telling whether to allow newlines to + be part of a string matching the regexp. The next-to-last argument, + if non-NULL, points to a place to increment every time we see a + newline. The final argument, if non-NULL, points to a flag that will + be set if further examination by a backtracking matcher is needed in + order to verify backreferencing; otherwise the flag will be cleared. + Returns NULL if no match is found, or a pointer to the first + character after the first & shortest matching string in the buffer. */ +extern char *regexecute(struct regexp *, char *, char *, int, int *, int *); + +/* Free the storage held by the components of a struct regexp. */ +extern void reg_free(struct regexp *); + +/* Entry points for people who know what they're doing. */ + +/* Initialize the components of a struct regexp. */ +extern void reginit(struct regexp *); + +/* Incrementally parse a string of given length into a struct regexp. */ +extern void regparse(const char *, size_t, struct regexp *); + +/* Analyze a parsed regexp; second argument tells whether to build a searching + or an exact matcher. */ +extern void reganalyze(struct regexp *, int); + +/* Compute, for each possible character, the transitions out of a given + state, storing them in an array of integers. */ +extern void regstate(int, struct regexp *, int []); + +/* Error handling. */ + +/* Regerror() is called by the regexp routines whenever an error occurs. It + takes a single argument, a NUL-terminated string describing the error. + The default reg_error() prints the error message to stderr and exits. + The user can provide a different reg_free() if so desired. */ +extern void reg_error(const char *); + +#else /* ! __STDC__ */ +extern void regsyntax(), regcompile(), reg_free(), reginit(), regparse(); +extern void reganalyze(), regstate(), reg_error(); +extern char *regexecute(); +#endif diff --git a/gnu/usr.bin/awk/eval.c b/gnu/usr.bin/awk/eval.c new file mode 100644 index 0000000..f640f37 --- /dev/null +++ b/gnu/usr.bin/awk/eval.c @@ -0,0 +1,1225 @@ +/* + * eval.c - gawk parse tree interpreter + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +extern double pow P((double x, double y)); +extern double modf P((double x, double *yp)); +extern double fmod P((double x, double y)); + +static int eval_condition P((NODE *tree)); +static NODE *op_assign P((NODE *tree)); +static NODE *func_call P((NODE *name, NODE *arg_list)); +static NODE *match_op P((NODE *tree)); + +NODE *_t; /* used as a temporary in macros */ +#ifdef MSDOS +double _msc51bug; /* to get around a bug in MSC 5.1 */ +#endif +NODE *ret_node; +int OFSlen; +int ORSlen; +int OFMTidx; +int CONVFMTidx; + +/* Macros and variables to save and restore function and loop bindings */ +/* + * the val variable allows return/continue/break-out-of-context to be + * caught and diagnosed + */ +#define PUSH_BINDING(stack, x, val) (memcpy ((char *)(stack), (char *)(x), sizeof (jmp_buf)), val++) +#define RESTORE_BINDING(stack, x, val) (memcpy ((char *)(x), (char *)(stack), sizeof (jmp_buf)), val--) + +static jmp_buf loop_tag; /* always the current binding */ +static int loop_tag_valid = 0; /* nonzero when loop_tag valid */ +static int func_tag_valid = 0; +static jmp_buf func_tag; +extern int exiting, exit_val; + +/* + * This table is used by the regexp routines to do case independant + * matching. Basically, every ascii character maps to itself, except + * uppercase letters map to lower case ones. This table has 256 + * entries, which may be overkill. Note also that if the system this + * is compiled on doesn't use 7-bit ascii, casetable[] should not be + * defined to the linker, so gawk should not load. + * + * Do NOT make this array static, it is used in several spots, not + * just in this file. + */ +#if 'a' == 97 /* it's ascii */ +char casetable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + /* ' ' '!' '"' '#' '$' '%' '&' ''' */ + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + /* '(' ')' '*' '+' ',' '-' '.' '/' */ + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + /* '0' '1' '2' '3' '4' '5' '6' '7' */ + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + /* '8' '9' ':' ';' '<' '=' '>' '?' */ + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */ + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */ + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */ + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */ + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */ + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */ + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */ + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + /* 'x' 'y' 'z' '{' '|' '}' '~' */ + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#else +#include "You lose. You will need a translation table for your character set." +#endif + +/* + * Tree is a bunch of rules to run. Returns zero if it hit an exit() + * statement + */ +int +interpret(tree) +register NODE *volatile tree; +{ + jmp_buf volatile loop_tag_stack; /* shallow binding stack for loop_tag */ + static jmp_buf rule_tag; /* tag the rule currently being run, for NEXT + * and EXIT statements. It is static because + * there are no nested rules */ + register NODE *volatile t = NULL; /* temporary */ + NODE **volatile lhs; /* lhs == Left Hand Side for assigns, etc */ + NODE *volatile stable_tree; + int volatile traverse = 1; /* True => loop thru tree (Node_rule_list) */ + + if (tree == NULL) + return 1; + sourceline = tree->source_line; + source = tree->source_file; + switch (tree->type) { + case Node_rule_node: + traverse = 0; /* False => one for-loop iteration only */ + /* FALL THROUGH */ + case Node_rule_list: + for (t = tree; t != NULL; t = t->rnode) { + if (traverse) + tree = t->lnode; + sourceline = tree->source_line; + source = tree->source_file; + switch (setjmp(rule_tag)) { + case 0: /* normal non-jump */ + /* test pattern, if any */ + if (tree->lnode == NULL || + eval_condition(tree->lnode)) + (void) interpret(tree->rnode); + break; + case TAG_CONTINUE: /* NEXT statement */ + return 1; + case TAG_BREAK: + return 0; + default: + cant_happen(); + } + if (!traverse) /* case Node_rule_node */ + break; /* don't loop */ + } + break; + + case Node_statement_list: + for (t = tree; t != NULL; t = t->rnode) + (void) interpret(t->lnode); + break; + + case Node_K_if: + if (eval_condition(tree->lnode)) { + (void) interpret(tree->rnode->lnode); + } else { + (void) interpret(tree->rnode->rnode); + } + break; + + case Node_K_while: + PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + + stable_tree = tree; + while (eval_condition(stable_tree->lnode)) { + switch (setjmp(loop_tag)) { + case 0: /* normal non-jump */ + (void) interpret(stable_tree->rnode); + break; + case TAG_CONTINUE: /* continue statement */ + break; + case TAG_BREAK: /* break statement */ + RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + return 1; + default: + cant_happen(); + } + } + RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + break; + + case Node_K_do: + PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + stable_tree = tree; + do { + switch (setjmp(loop_tag)) { + case 0: /* normal non-jump */ + (void) interpret(stable_tree->rnode); + break; + case TAG_CONTINUE: /* continue statement */ + break; + case TAG_BREAK: /* break statement */ + RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + return 1; + default: + cant_happen(); + } + } while (eval_condition(stable_tree->lnode)); + RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + break; + + case Node_K_for: + PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + (void) interpret(tree->forloop->init); + stable_tree = tree; + while (eval_condition(stable_tree->forloop->cond)) { + switch (setjmp(loop_tag)) { + case 0: /* normal non-jump */ + (void) interpret(stable_tree->lnode); + /* fall through */ + case TAG_CONTINUE: /* continue statement */ + (void) interpret(stable_tree->forloop->incr); + break; + case TAG_BREAK: /* break statement */ + RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + return 1; + default: + cant_happen(); + } + } + RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + break; + + case Node_K_arrayfor: + { + volatile struct search l; /* For array_for */ + Func_ptr after_assign = NULL; + +#define hakvar forloop->init +#define arrvar forloop->incr + PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + lhs = get_lhs(tree->hakvar, &after_assign); + t = tree->arrvar; + if (t->type == Node_param_list) + t = stack_ptr[t->param_cnt]; + stable_tree = tree; + for (assoc_scan(t, (struct search *)&l); + l.retval; + assoc_next((struct search *)&l)) { + unref(*((NODE **) lhs)); + *lhs = dupnode(l.retval); + if (after_assign) + (*after_assign)(); + switch (setjmp(loop_tag)) { + case 0: + (void) interpret(stable_tree->lnode); + case TAG_CONTINUE: + break; + + case TAG_BREAK: + RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + return 1; + default: + cant_happen(); + } + } + RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); + break; + } + + case Node_K_break: + if (loop_tag_valid == 0) + fatal("unexpected break"); + longjmp(loop_tag, TAG_BREAK); + break; + + case Node_K_continue: + if (loop_tag_valid == 0) { + /* + * AT&T nawk treats continue outside of loops like + * next. Allow it if not posix, and complain if + * lint. + */ + static int warned = 0; + + if (do_lint && ! warned) { + warning("use of `continue' outside of loop is not portable"); + warned = 1; + } + if (do_posix) + fatal("use of `continue' outside of loop is not allowed"); + longjmp(rule_tag, TAG_CONTINUE); + } else + longjmp(loop_tag, TAG_CONTINUE); + break; + + case Node_K_print: + do_print(tree); + break; + + case Node_K_printf: + do_printf(tree); + break; + + case Node_K_delete: + do_delete(tree->lnode, tree->rnode); + break; + + case Node_K_next: + longjmp(rule_tag, TAG_CONTINUE); + break; + + case Node_K_nextfile: + do_nextfile(); + break; + + case Node_K_exit: + /* + * In A,K,&W, p. 49, it says that an exit statement "... + * causes the program to behave as if the end of input had + * occurred; no more input is read, and the END actions, if + * any are executed." This implies that the rest of the rules + * are not done. So we immediately break out of the main loop. + */ + exiting = 1; + if (tree) { + t = tree_eval(tree->lnode); + exit_val = (int) force_number(t); + } + free_temp(t); + longjmp(rule_tag, TAG_BREAK); + break; + + case Node_K_return: + t = tree_eval(tree->lnode); + ret_node = dupnode(t); + free_temp(t); + longjmp(func_tag, TAG_RETURN); + break; + + default: + /* + * Appears to be an expression statement. Throw away the + * value. + */ + if (do_lint && tree->type == Node_var) + warning("statement has no effect"); + t = tree_eval(tree); + free_temp(t); + break; + } + return 1; +} + +/* evaluate a subtree */ + +NODE * +r_tree_eval(tree) +register NODE *tree; +{ + register NODE *r, *t1, *t2; /* return value & temporary subtrees */ + register NODE **lhs; + register int di; + AWKNUM x, x1, x2; + long lx; +#ifdef CRAY + long lx2; +#endif + +#ifdef DEBUG + if (tree == NULL) + return Nnull_string; + if (tree->type == Node_val) { + if (tree->stref <= 0) cant_happen(); + return tree; + } + if (tree->type == Node_var) { + if (tree->var_value->stref <= 0) cant_happen(); + return tree->var_value; + } + if (tree->type == Node_param_list) { + if (stack_ptr[tree->param_cnt] == NULL) + return Nnull_string; + else + return stack_ptr[tree->param_cnt]->var_value; + } +#endif + switch (tree->type) { + case Node_and: + return tmp_number((AWKNUM) (eval_condition(tree->lnode) + && eval_condition(tree->rnode))); + + case Node_or: + return tmp_number((AWKNUM) (eval_condition(tree->lnode) + || eval_condition(tree->rnode))); + + case Node_not: + return tmp_number((AWKNUM) ! eval_condition(tree->lnode)); + + /* Builtins */ + case Node_builtin: + return ((*tree->proc) (tree->subnode)); + + case Node_K_getline: + return (do_getline(tree)); + + case Node_in_array: + return tmp_number((AWKNUM) in_array(tree->lnode, tree->rnode)); + + case Node_func_call: + return func_call(tree->rnode, tree->lnode); + + /* unary operations */ + case Node_NR: + case Node_FNR: + case Node_NF: + case Node_FIELDWIDTHS: + case Node_FS: + case Node_RS: + case Node_field_spec: + case Node_subscript: + case Node_IGNORECASE: + case Node_OFS: + case Node_ORS: + case Node_OFMT: + case Node_CONVFMT: + lhs = get_lhs(tree, (Func_ptr *)0); + return *lhs; + + case Node_var_array: + fatal("attempt to use an array in a scalar context"); + + case Node_unary_minus: + t1 = tree_eval(tree->subnode); + x = -force_number(t1); + free_temp(t1); + return tmp_number(x); + + case Node_cond_exp: + if (eval_condition(tree->lnode)) + return tree_eval(tree->rnode->lnode); + return tree_eval(tree->rnode->rnode); + + case Node_match: + case Node_nomatch: + case Node_regex: + return match_op(tree); + + case Node_func: + fatal("function `%s' called with space between name and (,\n%s", + tree->lnode->param, + "or used in other expression context"); + + /* assignments */ + case Node_assign: + { + Func_ptr after_assign = NULL; + + r = tree_eval(tree->rnode); + lhs = get_lhs(tree->lnode, &after_assign); + if (r != *lhs) { + NODE *save; + + save = *lhs; + *lhs = dupnode(r); + unref(save); + } + free_temp(r); + if (after_assign) + (*after_assign)(); + return *lhs; + } + + case Node_concat: + { +#define STACKSIZE 10 + NODE *stack[STACKSIZE]; + register NODE **sp; + register int len; + char *str; + register char *dest; + + sp = stack; + len = 0; + while (tree->type == Node_concat) { + *sp = force_string(tree_eval(tree->lnode)); + tree = tree->rnode; + len += (*sp)->stlen; + if (++sp == &stack[STACKSIZE-2]) /* one more and NULL */ + break; + } + *sp = force_string(tree_eval(tree)); + len += (*sp)->stlen; + *++sp = NULL; + emalloc(str, char *, len+2, "tree_eval"); + dest = str; + sp = stack; + while (*sp) { + memcpy(dest, (*sp)->stptr, (*sp)->stlen); + dest += (*sp)->stlen; + free_temp(*sp); + sp++; + } + r = make_str_node(str, len, ALREADY_MALLOCED); + r->flags |= TEMP; + } + return r; + + /* other assignment types are easier because they are numeric */ + case Node_preincrement: + case Node_predecrement: + case Node_postincrement: + case Node_postdecrement: + case Node_assign_exp: + case Node_assign_times: + case Node_assign_quotient: + case Node_assign_mod: + case Node_assign_plus: + case Node_assign_minus: + return op_assign(tree); + default: + break; /* handled below */ + } + + /* evaluate subtrees in order to do binary operation, then keep going */ + t1 = tree_eval(tree->lnode); + t2 = tree_eval(tree->rnode); + + switch (tree->type) { + case Node_geq: + case Node_leq: + case Node_greater: + case Node_less: + case Node_notequal: + case Node_equal: + di = cmp_nodes(t1, t2); + free_temp(t1); + free_temp(t2); + switch (tree->type) { + case Node_equal: + return tmp_number((AWKNUM) (di == 0)); + case Node_notequal: + return tmp_number((AWKNUM) (di != 0)); + case Node_less: + return tmp_number((AWKNUM) (di < 0)); + case Node_greater: + return tmp_number((AWKNUM) (di > 0)); + case Node_leq: + return tmp_number((AWKNUM) (di <= 0)); + case Node_geq: + return tmp_number((AWKNUM) (di >= 0)); + default: + cant_happen(); + } + break; + default: + break; /* handled below */ + } + + x1 = force_number(t1); + free_temp(t1); + x2 = force_number(t2); + free_temp(t2); + switch (tree->type) { + case Node_exp: + if ((lx = x2) == x2 && lx >= 0) { /* integer exponent */ + if (lx == 0) + x = 1; + else if (lx == 1) + x = x1; + else { + /* doing it this way should be more precise */ + for (x = x1; --lx; ) + x *= x1; + } + } else + x = pow((double) x1, (double) x2); + return tmp_number(x); + + case Node_times: + return tmp_number(x1 * x2); + + case Node_quotient: + if (x2 == 0) + fatal("division by zero attempted"); +#ifdef _CRAY + /* + * special case for integer division, put in for Cray + */ + lx2 = x2; + if (lx2 == 0) + return tmp_number(x1 / x2); + lx = (long) x1 / lx2; + if (lx * x2 == x1) + return tmp_number((AWKNUM) lx); + else +#endif + return tmp_number(x1 / x2); + + case Node_mod: + if (x2 == 0) + fatal("division by zero attempted in mod"); +#ifndef FMOD_MISSING + return tmp_number(fmod (x1, x2)); +#else + (void) modf(x1 / x2, &x); + return tmp_number(x1 - x * x2); +#endif + + case Node_plus: + return tmp_number(x1 + x2); + + case Node_minus: + return tmp_number(x1 - x2); + + case Node_var_array: + fatal("attempt to use an array in a scalar context"); + + default: + fatal("illegal type (%d) in tree_eval", tree->type); + } + return 0; +} + +/* Is TREE true or false? Returns 0==false, non-zero==true */ +static int +eval_condition(tree) +register NODE *tree; +{ + register NODE *t1; + register int ret; + + if (tree == NULL) /* Null trees are the easiest kinds */ + return 1; + if (tree->type == Node_line_range) { + /* + * Node_line_range is kind of like Node_match, EXCEPT: the + * lnode field (more properly, the condpair field) is a node + * of a Node_cond_pair; whether we evaluate the lnode of that + * node or the rnode depends on the triggered word. More + * precisely: if we are not yet triggered, we tree_eval the + * lnode; if that returns true, we set the triggered word. + * If we are triggered (not ELSE IF, note), we tree_eval the + * rnode, clear triggered if it succeeds, and perform our + * action (regardless of success or failure). We want to be + * able to begin and end on a single input record, so this + * isn't an ELSE IF, as noted above. + */ + if (!tree->triggered) + if (!eval_condition(tree->condpair->lnode)) + return 0; + else + tree->triggered = 1; + /* Else we are triggered */ + if (eval_condition(tree->condpair->rnode)) + tree->triggered = 0; + return 1; + } + + /* + * Could just be J.random expression. in which case, null and 0 are + * false, anything else is true + */ + + t1 = tree_eval(tree); + if (t1->flags & MAYBE_NUM) + (void) force_number(t1); + if (t1->flags & NUMBER) + ret = t1->numbr != 0.0; + else + ret = t1->stlen != 0; + free_temp(t1); + return ret; +} + +/* + * compare two nodes, returning negative, 0, positive + */ +int +cmp_nodes(t1, t2) +register NODE *t1, *t2; +{ + register int ret; + register int len1, len2; + + if (t1 == t2) + return 0; + if (t1->flags & MAYBE_NUM) + (void) force_number(t1); + if (t2->flags & MAYBE_NUM) + (void) force_number(t2); + if ((t1->flags & NUMBER) && (t2->flags & NUMBER)) { + if (t1->numbr == t2->numbr) return 0; + else if (t1->numbr - t2->numbr < 0) return -1; + else return 1; + } + (void) force_string(t1); + (void) force_string(t2); + len1 = t1->stlen; + len2 = t2->stlen; + if (len1 == 0 || len2 == 0) + return len1 - len2; + ret = memcmp(t1->stptr, t2->stptr, len1 <= len2 ? len1 : len2); + return ret == 0 ? len1-len2 : ret; +} + +static NODE * +op_assign(tree) +register NODE *tree; +{ + AWKNUM rval, lval; + NODE **lhs; + AWKNUM t1, t2; + long ltemp; + NODE *tmp; + Func_ptr after_assign = NULL; + + lhs = get_lhs(tree->lnode, &after_assign); + lval = force_number(*lhs); + + /* + * Can't unref *lhs until we know the type; doing so + * too early breaks x += x sorts of things. + */ + switch(tree->type) { + case Node_preincrement: + case Node_predecrement: + unref(*lhs); + *lhs = make_number(lval + + (tree->type == Node_preincrement ? 1.0 : -1.0)); + if (after_assign) + (*after_assign)(); + return *lhs; + + case Node_postincrement: + case Node_postdecrement: + unref(*lhs); + *lhs = make_number(lval + + (tree->type == Node_postincrement ? 1.0 : -1.0)); + if (after_assign) + (*after_assign)(); + return tmp_number(lval); + default: + break; /* handled below */ + } + + tmp = tree_eval(tree->rnode); + rval = force_number(tmp); + free_temp(tmp); + unref(*lhs); + switch(tree->type) { + case Node_assign_exp: + if ((ltemp = rval) == rval) { /* integer exponent */ + if (ltemp == 0) + *lhs = make_number((AWKNUM) 1); + else if (ltemp == 1) + *lhs = make_number(lval); + else { + /* doing it this way should be more precise */ + for (t1 = t2 = lval; --ltemp; ) + t1 *= t2; + *lhs = make_number(t1); + } + } else + *lhs = make_number((AWKNUM) pow((double) lval, (double) rval)); + break; + + case Node_assign_times: + *lhs = make_number(lval * rval); + break; + + case Node_assign_quotient: + if (rval == (AWKNUM) 0) + fatal("division by zero attempted in /="); +#ifdef _CRAY + /* + * special case for integer division, put in for Cray + */ + ltemp = rval; + if (ltemp == 0) { + *lhs = make_number(lval / rval); + break; + } + ltemp = (long) lval / ltemp; + if (ltemp * lval == rval) + *lhs = make_number((AWKNUM) ltemp); + else +#endif + *lhs = make_number(lval / rval); + break; + + case Node_assign_mod: + if (rval == (AWKNUM) 0) + fatal("division by zero attempted in %="); +#ifndef FMOD_MISSING + *lhs = make_number(fmod(lval, rval)); +#else + (void) modf(lval / rval, &t1); + t2 = lval - rval * t1; + *lhs = make_number(t2); +#endif + break; + + case Node_assign_plus: + *lhs = make_number(lval + rval); + break; + + case Node_assign_minus: + *lhs = make_number(lval - rval); + break; + default: + cant_happen(); + } + if (after_assign) + (*after_assign)(); + return *lhs; +} + +NODE **stack_ptr; + +static NODE * +func_call(name, arg_list) +NODE *name; /* name is a Node_val giving function name */ +NODE *arg_list; /* Node_expression_list of calling args. */ +{ + register NODE *arg, *argp, *r; + NODE *n, *f; + jmp_buf volatile func_tag_stack; + jmp_buf volatile loop_tag_stack; + int volatile save_loop_tag_valid = 0; + NODE **volatile save_stack, *save_ret_node; + NODE **volatile local_stack = NULL, **sp; + int count; + extern NODE *ret_node; + + /* + * retrieve function definition node + */ + f = lookup(name->stptr); + if (!f || f->type != Node_func) + fatal("function `%s' not defined", name->stptr); +#ifdef FUNC_TRACE + fprintf(stderr, "function %s called\n", name->stptr); +#endif + count = f->lnode->param_cnt; + if (count) + emalloc(local_stack, NODE **, count*sizeof(NODE *), "func_call"); + sp = local_stack; + + /* + * for each calling arg. add NODE * on stack + */ + for (argp = arg_list; count && argp != NULL; argp = argp->rnode) { + arg = argp->lnode; + getnode(r); + r->type = Node_var; + /* + * call by reference for arrays; see below also + */ + if (arg->type == Node_param_list) + arg = stack_ptr[arg->param_cnt]; + if (arg->type == Node_var_array) + *r = *arg; + else { + n = tree_eval(arg); + r->lnode = dupnode(n); + r->rnode = (NODE *) NULL; + free_temp(n); + } + *sp++ = r; + count--; + } + if (argp != NULL) /* left over calling args. */ + warning( + "function `%s' called with more arguments than declared", + name->stptr); + /* + * add remaining params. on stack with null value + */ + while (count-- > 0) { + getnode(r); + r->type = Node_var; + r->lnode = Nnull_string; + r->rnode = (NODE *) NULL; + *sp++ = r; + } + + /* + * Execute function body, saving context, as a return statement + * will longjmp back here. + * + * Have to save and restore the loop_tag stuff so that a return + * inside a loop in a function body doesn't scrog any loops going + * on in the main program. We save the necessary info in variables + * local to this function so that function nesting works OK. + * We also only bother to save the loop stuff if we're in a loop + * when the function is called. + */ + if (loop_tag_valid) { + int junk = 0; + + save_loop_tag_valid = (volatile int) loop_tag_valid; + PUSH_BINDING(loop_tag_stack, loop_tag, junk); + loop_tag_valid = 0; + } + save_stack = stack_ptr; + stack_ptr = local_stack; + PUSH_BINDING(func_tag_stack, func_tag, func_tag_valid); + save_ret_node = ret_node; + ret_node = Nnull_string; /* default return value */ + if (setjmp(func_tag) == 0) + (void) interpret(f->rnode); + + r = ret_node; + ret_node = (NODE *) save_ret_node; + RESTORE_BINDING(func_tag_stack, func_tag, func_tag_valid); + stack_ptr = (NODE **) save_stack; + + /* + * here, we pop each parameter and check whether + * it was an array. If so, and if the arg. passed in was + * a simple variable, then the value should be copied back. + * This achieves "call-by-reference" for arrays. + */ + sp = local_stack; + count = f->lnode->param_cnt; + for (argp = arg_list; count > 0 && argp != NULL; argp = argp->rnode) { + arg = argp->lnode; + if (arg->type == Node_param_list) + arg = stack_ptr[arg->param_cnt]; + n = *sp++; + if (arg->type == Node_var && n->type == Node_var_array) { + /* should we free arg->var_value ? */ + arg->var_array = n->var_array; + arg->type = Node_var_array; + } + unref(n->lnode); + freenode(n); + count--; + } + while (count-- > 0) { + n = *sp++; + /* if n is an (local) array, all the elements should be freed */ + if (n->type == Node_var_array) { + assoc_clear(n); + free(n->var_array); + } + unref(n->lnode); + freenode(n); + } + if (local_stack) + free((char *) local_stack); + + /* Restore the loop_tag stuff if necessary. */ + if (save_loop_tag_valid) { + int junk = 0; + + loop_tag_valid = (int) save_loop_tag_valid; + RESTORE_BINDING(loop_tag_stack, loop_tag, junk); + } + + if (!(r->flags & PERM)) + r->flags |= TEMP; + return r; +} + +/* + * This returns a POINTER to a node pointer. get_lhs(ptr) is the current + * value of the var, or where to store the var's new value + */ + +NODE ** +get_lhs(ptr, assign) +register NODE *ptr; +Func_ptr *assign; +{ + register NODE **aptr = NULL; + register NODE *n; + + switch (ptr->type) { + case Node_var_array: + fatal("attempt to use an array in a scalar context"); + case Node_var: + aptr = &(ptr->var_value); +#ifdef DEBUG + if (ptr->var_value->stref <= 0) + cant_happen(); +#endif + break; + + case Node_FIELDWIDTHS: + aptr = &(FIELDWIDTHS_node->var_value); + if (assign) + *assign = set_FIELDWIDTHS; + break; + + case Node_RS: + aptr = &(RS_node->var_value); + if (assign) + *assign = set_RS; + break; + + case Node_FS: + aptr = &(FS_node->var_value); + if (assign) + *assign = set_FS; + break; + + case Node_FNR: + unref(FNR_node->var_value); + FNR_node->var_value = make_number((AWKNUM) FNR); + aptr = &(FNR_node->var_value); + if (assign) + *assign = set_FNR; + break; + + case Node_NR: + unref(NR_node->var_value); + NR_node->var_value = make_number((AWKNUM) NR); + aptr = &(NR_node->var_value); + if (assign) + *assign = set_NR; + break; + + case Node_NF: + if (NF == -1) + (void) get_field(HUGE-1, assign); /* parse record */ + unref(NF_node->var_value); + NF_node->var_value = make_number((AWKNUM) NF); + aptr = &(NF_node->var_value); + if (assign) + *assign = set_NF; + break; + + case Node_IGNORECASE: + unref(IGNORECASE_node->var_value); + IGNORECASE_node->var_value = make_number((AWKNUM) IGNORECASE); + aptr = &(IGNORECASE_node->var_value); + if (assign) + *assign = set_IGNORECASE; + break; + + case Node_OFMT: + aptr = &(OFMT_node->var_value); + if (assign) + *assign = set_OFMT; + break; + + case Node_CONVFMT: + aptr = &(CONVFMT_node->var_value); + if (assign) + *assign = set_CONVFMT; + break; + + case Node_ORS: + aptr = &(ORS_node->var_value); + if (assign) + *assign = set_ORS; + break; + + case Node_OFS: + aptr = &(OFS_node->var_value); + if (assign) + *assign = set_OFS; + break; + + case Node_param_list: + aptr = &(stack_ptr[ptr->param_cnt]->var_value); + break; + + case Node_field_spec: + { + int field_num; + + n = tree_eval(ptr->lnode); + field_num = (int) force_number(n); + free_temp(n); + if (field_num < 0) + fatal("attempt to access field %d", field_num); + if (field_num == 0 && field0_valid) { /* short circuit */ + aptr = &fields_arr[0]; + if (assign) + *assign = reset_record; + break; + } + aptr = get_field(field_num, assign); + break; + } + case Node_subscript: + n = ptr->lnode; + if (n->type == Node_param_list) + n = stack_ptr[n->param_cnt]; + aptr = assoc_lookup(n, concat_exp(ptr->rnode)); + break; + + case Node_func: + fatal ("`%s' is a function, assignment is not allowed", + ptr->lnode->param); + default: + cant_happen(); + } + return aptr; +} + +static NODE * +match_op(tree) +register NODE *tree; +{ + register NODE *t1; + register Regexp *rp; + int i; + int match = 1; + + if (tree->type == Node_nomatch) + match = 0; + if (tree->type == Node_regex) + t1 = *get_field(0, (Func_ptr *) 0); + else { + t1 = force_string(tree_eval(tree->lnode)); + tree = tree->rnode; + } + rp = re_update(tree); + i = research(rp, t1->stptr, 0, t1->stlen, 0); + i = (i == -1) ^ (match == 1); + free_temp(t1); + return tmp_number((AWKNUM) i); +} + +void +set_IGNORECASE() +{ + static int warned = 0; + + if ((do_lint || do_unix) && ! warned) { + warned = 1; + warning("IGNORECASE not supported in compatibility mode"); + } + IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0); + set_FS(); +} + +void +set_OFS() +{ + OFS = force_string(OFS_node->var_value)->stptr; + OFSlen = OFS_node->var_value->stlen; + OFS[OFSlen] = '\0'; +} + +void +set_ORS() +{ + ORS = force_string(ORS_node->var_value)->stptr; + ORSlen = ORS_node->var_value->stlen; + ORS[ORSlen] = '\0'; +} + +static NODE **fmt_list = NULL; +static int fmt_ok P((NODE *n)); +static int fmt_index P((NODE *n)); + +static int +fmt_ok(n) +NODE *n; +{ + /* to be done later */ + return 1; +} + +static int +fmt_index(n) +NODE *n; +{ + register int ix = 0; + static int fmt_num = 4; + static int fmt_hiwater = 0; + + if (fmt_list == NULL) + emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index"); + (void) force_string(n); + while (ix < fmt_hiwater) { + if (cmp_nodes(fmt_list[ix], n) == 0) + return ix; + ix++; + } + /* not found */ + n->stptr[n->stlen] = '\0'; + if (!fmt_ok(n)) + warning("bad FMT specification"); + if (fmt_hiwater >= fmt_num) { + fmt_num *= 2; + emalloc(fmt_list, NODE **, fmt_num, "fmt_index"); + } + fmt_list[fmt_hiwater] = dupnode(n); + return fmt_hiwater++; +} + +void +set_OFMT() +{ + OFMTidx = fmt_index(OFMT_node->var_value); + OFMT = fmt_list[OFMTidx]->stptr; +} + +void +set_CONVFMT() +{ + CONVFMTidx = fmt_index(CONVFMT_node->var_value); + CONVFMT = fmt_list[CONVFMTidx]->stptr; +} diff --git a/gnu/usr.bin/awk/field.c b/gnu/usr.bin/awk/field.c new file mode 100644 index 0000000..d8f9a54 --- /dev/null +++ b/gnu/usr.bin/awk/field.c @@ -0,0 +1,645 @@ +/* + * field.c - routines for dealing with fields and record parsing + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +static int (*parse_field) P((int, char **, int, NODE *, + Regexp *, void (*)(), NODE *)); +static void rebuild_record P((void)); +static int re_parse_field P((int, char **, int, NODE *, + Regexp *, void (*)(), NODE *)); +static int def_parse_field P((int, char **, int, NODE *, + Regexp *, void (*)(), NODE *)); +static int sc_parse_field P((int, char **, int, NODE *, + Regexp *, void (*)(), NODE *)); +static int fw_parse_field P((int, char **, int, NODE *, + Regexp *, void (*)(), NODE *)); +static void set_element P((int, char *, int, NODE *)); +static void grow_fields_arr P((int num)); +static void set_field P((int num, char *str, int len, NODE *dummy)); + + +static Regexp *FS_regexp = NULL; +static char *parse_extent; /* marks where to restart parse of record */ +static int parse_high_water=0; /* field number that we have parsed so far */ +static int nf_high_water = 0; /* size of fields_arr */ +static int resave_fs; +static NODE *save_FS; /* save current value of FS when line is read, + * to be used in deferred parsing + */ + +NODE **fields_arr; /* array of pointers to the field nodes */ +int field0_valid; /* $(>0) has not been changed yet */ +int default_FS; +static NODE **nodes; /* permanent repository of field nodes */ +static int *FIELDWIDTHS = NULL; + +void +init_fields() +{ + NODE *n; + + emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields"); + emalloc(nodes, NODE **, sizeof(NODE *), "init_fields"); + getnode(n); + *n = *Nnull_string; + fields_arr[0] = nodes[0] = n; + parse_extent = fields_arr[0]->stptr; + save_FS = dupnode(FS_node->var_value); + field0_valid = 1; +} + + +static void +grow_fields_arr(num) +int num; +{ + register int t; + register NODE *n; + + erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field"); + erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "set_field"); + for (t = nf_high_water+1; t <= num; t++) { + getnode(n); + *n = *Nnull_string; + fields_arr[t] = nodes[t] = n; + } + nf_high_water = num; +} + +/*ARGSUSED*/ +static void +set_field(num, str, len, dummy) +int num; +char *str; +int len; +NODE *dummy; /* not used -- just to make interface same as set_element */ +{ + register NODE *n; + + if (num > nf_high_water) + grow_fields_arr(num); + n = nodes[num]; + n->stptr = str; + n->stlen = len; + n->flags = (PERM|STR|STRING|MAYBE_NUM); + fields_arr[num] = n; +} + +/* Someone assigned a value to $(something). Fix up $0 to be right */ +static void +rebuild_record() +{ + register int tlen; + register NODE *tmp; + NODE *ofs; + char *ops; + register char *cops; + register NODE **ptr; + register int ofslen; + + tlen = 0; + ofs = force_string(OFS_node->var_value); + ofslen = ofs->stlen; + ptr = &fields_arr[NF]; + while (ptr > &fields_arr[0]) { + tmp = force_string(*ptr); + tlen += tmp->stlen; + ptr--; + } + tlen += (NF - 1) * ofslen; + if (tlen < 0) + tlen = 0; + emalloc(ops, char *, tlen + 2, "fix_fields"); + cops = ops; + ops[0] = '\0'; + for (ptr = &fields_arr[1]; ptr <= &fields_arr[NF]; ptr++) { + tmp = *ptr; + if (tmp->stlen == 1) + *cops++ = tmp->stptr[0]; + else if (tmp->stlen != 0) { + memcpy(cops, tmp->stptr, tmp->stlen); + cops += tmp->stlen; + } + if (ptr != &fields_arr[NF]) { + if (ofslen == 1) + *cops++ = ofs->stptr[0]; + else if (ofslen != 0) { + memcpy(cops, ofs->stptr, ofslen); + cops += ofslen; + } + } + } + tmp = make_str_node(ops, tlen, ALREADY_MALLOCED); + unref(fields_arr[0]); + fields_arr[0] = tmp; + field0_valid = 1; +} + +/* + * setup $0, but defer parsing rest of line until reference is made to $(>0) + * or to NF. At that point, parse only as much as necessary. + */ +void +set_record(buf, cnt, freeold) +char *buf; +int cnt; +int freeold; +{ + register int i; + + NF = -1; + for (i = 1; i <= parse_high_water; i++) { + unref(fields_arr[i]); + } + parse_high_water = 0; + if (freeold) { + unref(fields_arr[0]); + if (resave_fs) { + resave_fs = 0; + unref(save_FS); + save_FS = dupnode(FS_node->var_value); + } + nodes[0]->stptr = buf; + nodes[0]->stlen = cnt; + nodes[0]->stref = 1; + nodes[0]->flags = (STRING|STR|PERM|MAYBE_NUM); + fields_arr[0] = nodes[0]; + } + fields_arr[0]->flags |= MAYBE_NUM; + field0_valid = 1; +} + +void +reset_record() +{ + (void) force_string(fields_arr[0]); + set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, 0); +} + +void +set_NF() +{ + register int i; + + NF = (int) force_number(NF_node->var_value); + if (NF > nf_high_water) + grow_fields_arr(NF); + for (i = parse_high_water + 1; i <= NF; i++) { + unref(fields_arr[i]); + fields_arr[i] = Nnull_string; + } + field0_valid = 0; +} + +/* + * this is called both from get_field() and from do_split() + * via (*parse_field)(). This variation is for when FS is a regular + * expression -- either user-defined or because RS=="" and FS==" " + */ +static int +re_parse_field(up_to, buf, len, fs, rp, set, n) +int up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +NODE *fs; +Regexp *rp; +void (*set) (); /* routine to set the value of the parsed field */ +NODE *n; +{ + register char *scan = *buf; + register int nf = parse_high_water; + register char *field; + register char *end = scan + len; + + if (up_to == HUGE) + nf = 0; + if (len == 0) + return nf; + + if (*RS == 0 && default_FS) + while (scan < end && isspace(*scan)) + scan++; + field = scan; + while (scan < end + && research(rp, scan, 0, (int)(end - scan), 1) != -1 + && nf < up_to) { + if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */ + scan++; + if (scan == end) { + (*set)(++nf, field, scan - field, n); + up_to = nf; + break; + } + continue; + } + (*set)(++nf, field, scan + RESTART(rp, scan) - field, n); + scan += REEND(rp, scan); + field = scan; + if (scan == end) /* FS at end of record */ + (*set)(++nf, field, 0, n); + } + if (nf != up_to && scan < end) { + (*set)(++nf, scan, (int)(end - scan), n); + scan = end; + } + *buf = scan; + return (nf); +} + +/* + * this is called both from get_field() and from do_split() + * via (*parse_field)(). This variation is for when FS is a single space + * character. + */ +static int +def_parse_field(up_to, buf, len, fs, rp, set, n) +int up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +NODE *fs; +Regexp *rp; +void (*set) (); /* routine to set the value of the parsed field */ +NODE *n; +{ + register char *scan = *buf; + register int nf = parse_high_water; + register char *field; + register char *end = scan + len; + char sav; + + if (up_to == HUGE) + nf = 0; + if (len == 0) + return nf; + + /* before doing anything save the char at *end */ + sav = *end; + /* because it will be destroyed now: */ + + *end = ' '; /* sentinel character */ + for (; nf < up_to; scan++) { + /* + * special case: fs is single space, strip leading whitespace + */ + while (scan < end && (*scan == ' ' || *scan == '\t')) + scan++; + if (scan >= end) + break; + field = scan; + while (*scan != ' ' && *scan != '\t') + scan++; + (*set)(++nf, field, (int)(scan - field), n); + if (scan == end) + break; + } + + /* everything done, restore original char at *end */ + *end = sav; + + *buf = scan; + return nf; +} + +/* + * this is called both from get_field() and from do_split() + * via (*parse_field)(). This variation is for when FS is a single character + * other than space. + */ +static int +sc_parse_field(up_to, buf, len, fs, rp, set, n) +int up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +NODE *fs; +Regexp *rp; +void (*set) (); /* routine to set the value of the parsed field */ +NODE *n; +{ + register char *scan = *buf; + register char fschar; + register int nf = parse_high_water; + register char *field; + register char *end = scan + len; + char sav; + + if (up_to == HUGE) + nf = 0; + if (len == 0) + return nf; + + if (*RS == 0 && fs->stlen == 0) + fschar = '\n'; + else + fschar = fs->stptr[0]; + + /* before doing anything save the char at *end */ + sav = *end; + /* because it will be destroyed now: */ + *end = fschar; /* sentinel character */ + + for (; nf < up_to; scan++) { + field = scan; + while (*scan++ != fschar) + ; + scan--; + (*set)(++nf, field, (int)(scan - field), n); + if (scan == end) + break; + } + + /* everything done, restore original char at *end */ + *end = sav; + + *buf = scan; + return nf; +} + +/* + * this is called both from get_field() and from do_split() + * via (*parse_field)(). This variation is for fields are fixed widths. + */ +static int +fw_parse_field(up_to, buf, len, fs, rp, set, n) +int up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +NODE *fs; +Regexp *rp; +void (*set) (); /* routine to set the value of the parsed field */ +NODE *n; +{ + register char *scan = *buf; + register int nf = parse_high_water; + register char *end = scan + len; + + if (up_to == HUGE) + nf = 0; + if (len == 0) + return nf; + for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) { + if (len > end - scan) + len = end - scan; + (*set)(++nf, scan, len, n); + scan += len; + } + if (len == -1) + *buf = end; + else + *buf = scan; + return nf; +} + +NODE ** +get_field(requested, assign) +register int requested; +Func_ptr *assign; /* this field is on the LHS of an assign */ +{ + /* + * if requesting whole line but some other field has been altered, + * then the whole line must be rebuilt + */ + if (requested == 0) { + if (!field0_valid) { + /* first, parse remainder of input record */ + if (NF == -1) { + NF = (*parse_field)(HUGE-1, &parse_extent, + fields_arr[0]->stlen - + (parse_extent - fields_arr[0]->stptr), + save_FS, FS_regexp, set_field, + (NODE *)NULL); + parse_high_water = NF; + } + rebuild_record(); + } + if (assign) + *assign = reset_record; + return &fields_arr[0]; + } + + /* assert(requested > 0); */ + + if (assign) + field0_valid = 0; /* $0 needs reconstruction */ + + if (requested <= parse_high_water) /* already parsed this field */ + return &fields_arr[requested]; + + if (NF == -1) { /* have not yet parsed to end of record */ + /* + * parse up to requested fields, calling set_field() for each, + * saving in parse_extent the point where the parse left off + */ + if (parse_high_water == 0) /* starting at the beginning */ + parse_extent = fields_arr[0]->stptr; + parse_high_water = (*parse_field)(requested, &parse_extent, + fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr), + save_FS, FS_regexp, set_field, (NODE *)NULL); + + /* + * if we reached the end of the record, set NF to the number of + * fields so far. Note that requested might actually refer to + * a field that is beyond the end of the record, but we won't + * set NF to that value at this point, since this is only a + * reference to the field and NF only gets set if the field + * is assigned to -- this case is handled below + */ + if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen) + NF = parse_high_water; + if (requested == HUGE-1) /* HUGE-1 means set NF */ + requested = parse_high_water; + } + if (parse_high_water < requested) { /* requested beyond end of record */ + if (assign) { /* expand record */ + register int i; + + if (requested > nf_high_water) + grow_fields_arr(requested); + + /* fill in fields that don't exist */ + for (i = parse_high_water + 1; i <= requested; i++) + fields_arr[i] = Nnull_string; + + NF = requested; + parse_high_water = requested; + } else + return &Nnull_string; + } + + return &fields_arr[requested]; +} + +static void +set_element(num, s, len, n) +int num; +char *s; +int len; +NODE *n; +{ + register NODE *it; + + it = make_string(s, len); + it->flags |= MAYBE_NUM; + *assoc_lookup(n, tmp_number((AWKNUM) (num))) = it; +} + +NODE * +do_split(tree) +NODE *tree; +{ + NODE *t1, *t2, *t3, *tmp; + NODE *fs; + char *s; + int (*parseit)P((int, char **, int, NODE *, + Regexp *, void (*)(), NODE *)); + Regexp *rp = NULL; + + t1 = tree_eval(tree->lnode); + t2 = tree->rnode->lnode; + t3 = tree->rnode->rnode->lnode; + + (void) force_string(t1); + + if (t2->type == Node_param_list) + t2 = stack_ptr[t2->param_cnt]; + if (t2->type != Node_var && t2->type != Node_var_array) + fatal("second argument of split is not a variable"); + assoc_clear(t2); + + if (t3->re_flags & FS_DFLT) { + parseit = parse_field; + fs = force_string(FS_node->var_value); + rp = FS_regexp; + } else { + tmp = force_string(tree_eval(t3->re_exp)); + if (tmp->stlen == 1) { + if (tmp->stptr[0] == ' ') + parseit = def_parse_field; + else + parseit = sc_parse_field; + } else { + parseit = re_parse_field; + rp = re_update(t3); + } + fs = tmp; + } + + s = t1->stptr; + tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int)t1->stlen, + fs, rp, set_element, t2)); + free_temp(t1); + free_temp(t3); + return tmp; +} + +void +set_FS() +{ + NODE *tmp = NULL; + char buf[10]; + NODE *fs; + + buf[0] = '\0'; + default_FS = 0; + if (FS_regexp) { + refree(FS_regexp); + FS_regexp = NULL; + } + fs = force_string(FS_node->var_value); + if (fs->stlen > 1) + parse_field = re_parse_field; + else if (*RS == 0) { + parse_field = sc_parse_field; + if (fs->stlen == 1) { + if (fs->stptr[0] == ' ') { + default_FS = 1; + strcpy(buf, "[ \t\n]+"); + } else if (fs->stptr[0] != '\n') + sprintf(buf, "[%c\n]", fs->stptr[0]); + } + } else { + parse_field = def_parse_field; + if (fs->stptr[0] == ' ' && fs->stlen == 1) + default_FS = 1; + else if (fs->stptr[0] != ' ' && fs->stlen == 1) { + if (IGNORECASE == 0) + parse_field = sc_parse_field; + else + sprintf(buf, "[%c]", fs->stptr[0]); + } + } + if (buf[0]) { + FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, 1); + parse_field = re_parse_field; + } else if (parse_field == re_parse_field) { + FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, 1); + } else + FS_regexp = NULL; + resave_fs = 1; +} + +void +set_RS() +{ + (void) force_string(RS_node->var_value); + RS = RS_node->var_value->stptr; + set_FS(); +} + +void +set_FIELDWIDTHS() +{ + register char *scan; + char *end; + register int i; + static int fw_alloc = 1; + static int warned = 0; + extern double strtod(); + + if (do_lint && ! warned) { + warned = 1; + warning("use of FIELDWIDTHS is a gawk extension"); + } + if (do_unix) /* quick and dirty, does the trick */ + return; + + parse_field = fw_parse_field; + scan = force_string(FIELDWIDTHS_node->var_value)->stptr; + end = scan + 1; + if (FIELDWIDTHS == NULL) + emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS"); + FIELDWIDTHS[0] = 0; + for (i = 1; ; i++) { + if (i >= fw_alloc) { + fw_alloc *= 2; + erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS"); + } + FIELDWIDTHS[i] = (int) strtod(scan, &end); + if (end == scan) + break; + scan = end; + } + FIELDWIDTHS[i] = -1; +} diff --git a/gnu/usr.bin/awk/getopt.c b/gnu/usr.bin/awk/getopt.c new file mode 100644 index 0000000..bbf345c --- /dev/null +++ b/gnu/usr.bin/awk/getopt.c @@ -0,0 +1,662 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu + before changing it! + + Copyright (C) 1987, 88, 89, 90, 91, 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifdef GAWK +#include "config.h" +#endif + +#include <stdio.h> + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#include <stdlib.h> +#include <string.h> +#endif /* GNU C library. */ + + +#ifndef __STDC__ +#define const +#endif + +/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a + long-named option. Because this is not POSIX.2 compliant, it is + being phased out. */ +#define GETOPT_COMPAT + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg = 0; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +int optind = 0; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return EOF with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +#ifdef __GNU_LIBRARY__ +#include <string.h> +#define my_index strchr +#define my_bcopy(src, dst, n) memcpy ((dst), (src), (n)) +#else + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +char *getenv (); + +static char * +my_index (string, chr) + char *string; + int chr; +{ + while (*string) + { + if (*string == chr) + return string; + string++; + } + return 0; +} + +static void +my_bcopy (from, to, size) + char *from, *to; + int size; +{ + int i; + for (i = 0; i < size; i++) + to[i] = from[i]; +} +#endif /* GNU C library. */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (argv) + char **argv; +{ + int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *); + char **temp = (char **) malloc (nonopts_size); + + /* Interchange the two blocks of data in ARGV. */ + + my_bcopy (&argv[first_nonopt], temp, nonopts_size); + my_bcopy (&argv[last_nonopt], &argv[first_nonopt], + (optind - last_nonopt) * sizeof (char *)); + my_bcopy (temp, &argv[first_nonopt + optind - last_nonopt], nonopts_size); + + free(temp); + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns `EOF'. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int option_index; + + optarg = 0; + + /* Initialize the internal data when the first call is made. + Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + if (optind == 0) + { + first_nonopt = last_nonopt = optind = 1; + + nextchar = NULL; + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (getenv ("POSIXLY_CORRECT") != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + } + + if (nextchar == NULL || *nextchar == '\0') + { + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Now skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc + && (argv[optind][0] != '-' || argv[optind][1] == '\0') +#ifdef GETOPT_COMPAT + && (longopts == NULL + || argv[optind][0] != '+' || argv[optind][1] == '\0') +#endif /* GETOPT_COMPAT */ + ) + optind++; + last_nonopt = optind; + } + + /* Special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return EOF; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if ((argv[optind][0] != '-' || argv[optind][1] == '\0') +#ifdef GETOPT_COMPAT + && (longopts == NULL + || argv[optind][0] != '+' || argv[optind][1] == '\0') +#endif /* GETOPT_COMPAT */ + ) + { + if (ordering == REQUIRE_ORDER) + return EOF; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Start decoding its characters. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + if (longopts != NULL + && ((argv[optind][0] == '-' + && (argv[optind][1] == '-' || long_only)) +#ifdef GETOPT_COMPAT + || argv[optind][0] == '+' +#endif /* GETOPT_COMPAT */ + )) + { + const struct option *p; + char *s = nextchar; + int exact = 0; + int ambig = 0; + const struct option *pfound = NULL; + int indfound = 0; + extern int strncmp(); + + while (*s && *s != '=') + s++; + + /* Test all options for either exact match or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; + p++, option_index++) + if (!strncmp (p->name, nextchar, s - nextchar)) + { + if (s - nextchar == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, "%s: option `%s' is ambiguous\n", + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*s) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = s + 1; + else + { + if (opterr) + { + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + "%s: option `--%s' doesn't allow an argument\n", + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + "%s: option `%c%s' doesn't allow an argument\n", + argv[0], argv[optind - 1][0], pfound->name); + } + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, "%s: option `%s' requires an argument\n", + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + return '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' +#ifdef GETOPT_COMPAT + || argv[optind][0] == '+' +#endif /* GETOPT_COMPAT */ + || my_index (optstring, *nextchar) == NULL) + { + if (opterr) + { + if (argv[optind][1] == '-') + /* --option */ + fprintf (stderr, "%s: unrecognized option `--%s'\n", + argv[0], nextchar); + else + /* +option or -option */ + fprintf (stderr, "%s: unrecognized option `%c%s'\n", + argv[0], argv[optind][0], nextchar); + } + nextchar = (char *) ""; + optind++; + return '?'; + } + } + + /* Look at and handle the next option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (opterr) + { + if (c < 040 || c >= 0177) + fprintf (stderr, "%s: unrecognized option, character code 0%o\n", + argv[0], c); + else + fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c); + } + return '?'; + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = 0; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + fprintf (stderr, "%s: option `-%c' requires an argument\n", + argv[0], c); + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == EOF) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/gnu/usr.bin/awk/getopt.h b/gnu/usr.bin/awk/getopt.h new file mode 100644 index 0000000..de02743 --- /dev/null +++ b/gnu/usr.bin/awk/getopt.h @@ -0,0 +1,128 @@ +/* Declarations for getopt. + Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _GETOPT_H +#define _GETOPT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +#if __STDC__ + const char *name; +#else + char *name; +#endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +enum _argtype +{ + no_argument, + required_argument, + optional_argument +}; + +#if __STDC__ +#if defined(__GNU_LIBRARY__) +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int argc, char *const *argv, const char *shortopts); +#else /* not __GNU_LIBRARY__ */ +extern int getopt (); +#endif /* not __GNU_LIBRARY__ */ +extern int getopt_long (int argc, char *const *argv, const char *shortopts, + const struct option *longopts, int *longind); +extern int getopt_long_only (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind, + int long_only); +#else /* not __STDC__ */ +extern int getopt (); +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +#endif /* not __STDC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* _GETOPT_H */ diff --git a/gnu/usr.bin/awk/getopt1.c b/gnu/usr.bin/awk/getopt1.c new file mode 100644 index 0000000..e2127cd --- /dev/null +++ b/gnu/usr.bin/awk/getopt1.c @@ -0,0 +1,160 @@ +/* Getopt for GNU. + Copyright (C) 1987, 88, 89, 90, 91, 1992 Free Software Foundation, Inc. + +This file is part of the libiberty library. +Libiberty is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public +License as published by the Free Software Foundation; either +version 2 of the License, or (at your option) any later version. + +Libiberty is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with libiberty; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +#ifdef LIBC +/* For when compiled as part of the GNU C library. */ +#include <ansidecl.h> +#endif + +#include "getopt.h" + +#ifndef __STDC__ +#define const +#endif + +#if defined(STDC_HEADERS) || defined(__GNU_LIBRARY__) || defined (LIBC) +#include <stdlib.h> +#else /* STDC_HEADERS or __GNU_LIBRARY__ */ +char *getenv (); +#endif /* STDC_HEADERS or __GNU_LIBRARY__ */ + +#if !defined (NULL) +#define NULL 0 +#endif + +int +getopt_long (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 1); +} + +#ifdef TEST + +#include <stdio.h> + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == EOF) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option d with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/gnu/usr.bin/awk/io.c b/gnu/usr.bin/awk/io.c new file mode 100644 index 0000000..7004aed --- /dev/null +++ b/gnu/usr.bin/awk/io.c @@ -0,0 +1,1207 @@ +/* + * io.c --- routines for dealing with input and output and records + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +#ifndef O_RDONLY +#include <fcntl.h> +#endif + +#if !defined(S_ISDIR) && defined(S_IFDIR) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#endif + +#ifndef atarist +#define INVALID_HANDLE (-1) +#else +#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1) +#endif + +#if defined(MSDOS) || defined(atarist) +#define PIPES_SIMULATED +#endif + +static IOBUF *nextfile P((int skipping)); +static int inrec P((IOBUF *iop)); +static int iop_close P((IOBUF *iop)); +struct redirect *redirect P((NODE *tree, int *errflg)); +static void close_one P((void)); +static int close_redir P((struct redirect *rp)); +#ifndef PIPES_SIMULATED +static int wait_any P((int interesting)); +#endif +static IOBUF *gawk_popen P((char *cmd, struct redirect *rp)); +static IOBUF *iop_open P((char *file, char *how)); +static int gawk_pclose P((struct redirect *rp)); +static int do_pathopen P((char *file)); + +extern FILE *fdopen(); +extern FILE *popen(); + +static struct redirect *red_head = NULL; + +extern int output_is_tty; +extern NODE *ARGC_node; +extern NODE *ARGV_node; +extern NODE *ARGIND_node; +extern NODE *ERRNO_node; +extern NODE **fields_arr; + +static jmp_buf filebuf; /* for do_nextfile() */ + +/* do_nextfile --- implement gawk "next file" extension */ + +void +do_nextfile() +{ + (void) nextfile(1); + longjmp(filebuf, 1); +} + +static IOBUF * +nextfile(skipping) +int skipping; +{ + static int i = 1; + static int files = 0; + NODE *arg; + int fd = INVALID_HANDLE; + static IOBUF *curfile = NULL; + + if (skipping) { + if (curfile != NULL) + iop_close(curfile); + curfile = NULL; + return NULL; + } + if (curfile != NULL) { + if (curfile->cnt == EOF) { + (void) iop_close(curfile); + curfile = NULL; + } else + return curfile; + } + for (; i < (int) (ARGC_node->lnode->numbr); i++) { + arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i)); + if (arg->stptr[0] == '\0') + continue; + arg->stptr[arg->stlen] = '\0'; + if (! do_unix) { + ARGIND_node->var_value->numbr = i; + ARGIND_node->var_value->flags = NUM|NUMBER; + } + if (!arg_assign(arg->stptr)) { + files++; + curfile = iop_open(arg->stptr, "r"); + if (curfile == NULL) + fatal("cannot open file `%s' for reading (%s)", + arg->stptr, strerror(errno)); + /* NOTREACHED */ + /* This is a kludge. */ + unref(FILENAME_node->var_value); + FILENAME_node->var_value = + dupnode(arg); + FNR = 0; + i++; + break; + } + } + if (files == 0) { + files++; + /* no args. -- use stdin */ + /* FILENAME is init'ed to "-" */ + /* FNR is init'ed to 0 */ + curfile = iop_alloc(fileno(stdin)); + } + return curfile; +} + +void +set_FNR() +{ + FNR = (int) FNR_node->var_value->numbr; +} + +void +set_NR() +{ + NR = (int) NR_node->var_value->numbr; +} + +/* + * This reads in a record from the input file + */ +static int +inrec(iop) +IOBUF *iop; +{ + char *begin; + register int cnt; + int retval = 0; + + cnt = get_a_record(&begin, iop, *RS, NULL); + if (cnt == EOF) { + cnt = 0; + retval = 1; + } else { + NR += 1; + FNR += 1; + } + set_record(begin, cnt, 1); + + return retval; +} + +static int +iop_close(iop) +IOBUF *iop; +{ + int ret; + + if (iop == NULL) + return 0; + errno = 0; + +#ifdef _CRAY + /* Work around bug in UNICOS popen */ + if (iop->fd < 3) + ret = 0; + else +#endif + /* save these for re-use; don't free the storage */ + if ((iop->flag & IOP_IS_INTERNAL) != 0) { + iop->off = iop->buf; + iop->end = iop->buf + strlen(iop->buf); + iop->cnt = 0; + iop->secsiz = 0; + return 0; + } + + /* Don't close standard files or else crufty code elsewhere will lose */ + if (iop->fd == fileno(stdin) || + iop->fd == fileno(stdout) || + iop->fd == fileno(stderr)) + ret = 0; + else + ret = close(iop->fd); + if (ret == -1) + warning("close of fd %d failed (%s)", iop->fd, strerror(errno)); + if ((iop->flag & IOP_NO_FREE) == 0) { + /* + * be careful -- $0 may still reference the buffer even though + * an explicit close is being done; in the future, maybe we + * can do this a bit better + */ + if (iop->buf) { + if ((fields_arr[0]->stptr >= iop->buf) + && (fields_arr[0]->stptr < iop->end)) { + NODE *t; + + t = make_string(fields_arr[0]->stptr, + fields_arr[0]->stlen); + unref(fields_arr[0]); + fields_arr [0] = t; + reset_record (); + } + free(iop->buf); + } + free((char *)iop); + } + return ret == -1 ? 1 : 0; +} + +void +do_input() +{ + IOBUF *iop; + extern int exiting; + + if (setjmp(filebuf) != 0) { + } + while ((iop = nextfile(0)) != NULL) { + if (inrec(iop) == 0) + while (interpret(expression_value) && inrec(iop) == 0) + ; + if (exiting) + break; + } +} + +/* Redirection for printf and print commands */ +struct redirect * +redirect(tree, errflg) +NODE *tree; +int *errflg; +{ + register NODE *tmp; + register struct redirect *rp; + register char *str; + int tflag = 0; + int outflag = 0; + char *direction = "to"; + char *mode; + int fd; + char *what = NULL; + + switch (tree->type) { + case Node_redirect_append: + tflag = RED_APPEND; + /* FALL THROUGH */ + case Node_redirect_output: + outflag = (RED_FILE|RED_WRITE); + tflag |= outflag; + if (tree->type == Node_redirect_output) + what = ">"; + else + what = ">>"; + break; + case Node_redirect_pipe: + tflag = (RED_PIPE|RED_WRITE); + what = "|"; + break; + case Node_redirect_pipein: + tflag = (RED_PIPE|RED_READ); + what = "|"; + break; + case Node_redirect_input: + tflag = (RED_FILE|RED_READ); + what = "<"; + break; + default: + fatal ("invalid tree type %d in redirect()", tree->type); + break; + } + tmp = tree_eval(tree->subnode); + if (do_lint && ! (tmp->flags & STR)) + warning("expression in `%s' redirection only has numeric value", + what); + tmp = force_string(tmp); + str = tmp->stptr; + if (str == NULL || *str == '\0') + fatal("expression for `%s' redirection has null string value", + what); + if (do_lint + && (STREQN(str, "0", tmp->stlen) || STREQN(str, "1", tmp->stlen))) + warning("filename `%s' for `%s' redirection may be result of logical expression", str, what); + for (rp = red_head; rp != NULL; rp = rp->next) + if (strlen(rp->value) == tmp->stlen + && STREQN(rp->value, str, tmp->stlen) + && ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag + || (outflag + && (rp->flag & (RED_FILE|RED_WRITE)) == outflag))) + break; + if (rp == NULL) { + emalloc(rp, struct redirect *, sizeof(struct redirect), + "redirect"); + emalloc(str, char *, tmp->stlen+1, "redirect"); + memcpy(str, tmp->stptr, tmp->stlen); + str[tmp->stlen] = '\0'; + rp->value = str; + rp->flag = tflag; + rp->fp = NULL; + rp->iop = NULL; + rp->pid = 0; /* unlikely that we're worried about init */ + rp->status = 0; + /* maintain list in most-recently-used first order */ + if (red_head) + red_head->prev = rp; + rp->prev = NULL; + rp->next = red_head; + red_head = rp; + } + while (rp->fp == NULL && rp->iop == NULL) { + if (rp->flag & RED_EOF) + /* encountered EOF on file or pipe -- must be cleared + * by explicit close() before reading more + */ + return rp; + mode = NULL; + errno = 0; + switch (tree->type) { + case Node_redirect_output: + mode = "w"; + if (rp->flag & RED_USED) + mode = "a"; + break; + case Node_redirect_append: + mode = "a"; + break; + case Node_redirect_pipe: + if ((rp->fp = popen(str, "w")) == NULL) + fatal("can't open pipe (\"%s\") for output (%s)", + str, strerror(errno)); + rp->flag |= RED_NOBUF; + break; + case Node_redirect_pipein: + direction = "from"; + if (gawk_popen(str, rp) == NULL) + fatal("can't open pipe (\"%s\") for input (%s)", + str, strerror(errno)); + break; + case Node_redirect_input: + direction = "from"; + rp->iop = iop_open(str, "r"); + break; + default: + cant_happen(); + } + if (mode != NULL) { + fd = devopen(str, mode); + if (fd > INVALID_HANDLE) { + if (fd == fileno(stdin)) + rp->fp = stdin; + else if (fd == fileno(stdout)) + rp->fp = stdout; + else if (fd == fileno(stderr)) + rp->fp = stderr; + else + rp->fp = fdopen(fd, mode); + if (isatty(fd)) + rp->flag |= RED_NOBUF; + } + } + if (rp->fp == NULL && rp->iop == NULL) { + /* too many files open -- close one and try again */ + if (errno == EMFILE) + close_one(); + else { + /* + * Some other reason for failure. + * + * On redirection of input from a file, + * just return an error, so e.g. getline + * can return -1. For output to file, + * complain. The shell will complain on + * a bad command to a pipe. + */ + *errflg = errno; + if (tree->type == Node_redirect_output + || tree->type == Node_redirect_append) + fatal("can't redirect %s `%s' (%s)", + direction, str, strerror(errno)); + else { + free_temp(tmp); + return NULL; + } + } + } + } + free_temp(tmp); + return rp; +} + +static void +close_one() +{ + register struct redirect *rp; + register struct redirect *rplast = NULL; + + /* go to end of list first, to pick up least recently used entry */ + for (rp = red_head; rp != NULL; rp = rp->next) + rplast = rp; + /* now work back up through the list */ + for (rp = rplast; rp != NULL; rp = rp->prev) + if (rp->fp && (rp->flag & RED_FILE)) { + rp->flag |= RED_USED; + errno = 0; + if (fclose(rp->fp)) + warning("close of \"%s\" failed (%s).", + rp->value, strerror(errno)); + rp->fp = NULL; + break; + } + if (rp == NULL) + /* surely this is the only reason ??? */ + fatal("too many pipes or input files open"); +} + +NODE * +do_close(tree) +NODE *tree; +{ + NODE *tmp; + register struct redirect *rp; + + tmp = force_string(tree_eval(tree->subnode)); + for (rp = red_head; rp != NULL; rp = rp->next) { + if (strlen(rp->value) == tmp->stlen + && STREQN(rp->value, tmp->stptr, tmp->stlen)) + break; + } + free_temp(tmp); + if (rp == NULL) /* no match */ + return tmp_number((AWKNUM) 0.0); + fflush(stdout); /* synchronize regular output */ + tmp = tmp_number((AWKNUM)close_redir(rp)); + rp = NULL; + return tmp; +} + +static int +close_redir(rp) +register struct redirect *rp; +{ + int status = 0; + + if (rp == NULL) + return 0; + if (rp->fp == stdout || rp->fp == stderr) + return 0; + errno = 0; + if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE)) + status = pclose(rp->fp); + else if (rp->fp) + status = fclose(rp->fp); + else if (rp->iop) { + if (rp->flag & RED_PIPE) + status = gawk_pclose(rp); + else { + status = iop_close(rp->iop); + rp->iop = NULL; + } + } + /* SVR4 awk checks and warns about status of close */ + if (status) { + char *s = strerror(errno); + + warning("failure status (%d) on %s close of \"%s\" (%s).", + status, + (rp->flag & RED_PIPE) ? "pipe" : + "file", rp->value, s); + + if (! do_unix) { + /* set ERRNO too so that program can get at it */ + unref(ERRNO_node->var_value); + ERRNO_node->var_value = make_string(s, strlen(s)); + } + } + if (rp->next) + rp->next->prev = rp->prev; + if (rp->prev) + rp->prev->next = rp->next; + else + red_head = rp->next; + free(rp->value); + free((char *)rp); + return status; +} + +int +flush_io () +{ + register struct redirect *rp; + int status = 0; + + errno = 0; + if (fflush(stdout)) { + warning("error writing standard output (%s).", strerror(errno)); + status++; + } + if (fflush(stderr)) { + warning("error writing standard error (%s).", strerror(errno)); + status++; + } + for (rp = red_head; rp != NULL; rp = rp->next) + /* flush both files and pipes, what the heck */ + if ((rp->flag & RED_WRITE) && rp->fp != NULL) { + if (fflush(rp->fp)) { + warning("%s flush of \"%s\" failed (%s).", + (rp->flag & RED_PIPE) ? "pipe" : + "file", rp->value, strerror(errno)); + status++; + } + } + return status; +} + +int +close_io () +{ + register struct redirect *rp; + register struct redirect *next; + int status = 0; + + errno = 0; + if (fclose(stdout)) { + warning("error writing standard output (%s).", strerror(errno)); + status++; + } + if (fclose(stderr)) { + warning("error writing standard error (%s).", strerror(errno)); + status++; + } + for (rp = red_head; rp != NULL; rp = next) { + next = rp->next; + if (close_redir(rp)) + status++; + rp = NULL; + } + return status; +} + +/* str2mode --- convert a string mode to an integer mode */ + +static int +str2mode(mode) +char *mode; +{ + int ret; + + switch(mode[0]) { + case 'r': + ret = O_RDONLY; + break; + + case 'w': + ret = O_WRONLY|O_CREAT|O_TRUNC; + break; + + case 'a': + ret = O_WRONLY|O_APPEND|O_CREAT; + break; + default: + cant_happen(); + } + return ret; +} + +/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */ + +/* + * This separate version is still needed for output, since file and pipe + * output is done with stdio. iop_open() handles input with IOBUFs of + * more "special" files. Those files are not handled here since it makes + * no sense to use them for output. + */ + +int +devopen(name, mode) +char *name, *mode; +{ + int openfd = INVALID_HANDLE; + char *cp, *ptr; + int flag = 0; + struct stat buf; + extern double strtod(); + + flag = str2mode(mode); + + if (do_unix) + goto strictopen; + +#ifdef VMS + if ((openfd = vms_devopen(name, flag)) >= 0) + return openfd; +#endif /* VMS */ + + if (STREQ(name, "-")) + openfd = fileno(stdin); + else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) { + cp = name + 5; + + if (STREQ(cp, "stdin") && (flag & O_RDONLY) == O_RDONLY) + openfd = fileno(stdin); + else if (STREQ(cp, "stdout") && (flag & O_WRONLY) == O_WRONLY) + openfd = fileno(stdout); + else if (STREQ(cp, "stderr") && (flag & O_WRONLY) == O_WRONLY) + openfd = fileno(stderr); + else if (STREQN(cp, "fd/", 3)) { + cp += 3; + openfd = (int)strtod(cp, &ptr); + if (openfd <= INVALID_HANDLE || ptr == cp) + openfd = INVALID_HANDLE; + } + } + +strictopen: + if (openfd == INVALID_HANDLE) + openfd = open(name, flag, 0666); + if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0) + if (S_ISDIR(buf.st_mode)) + fatal("file `%s' is a directory", name); + return openfd; +} + + +/* spec_setup --- setup an IOBUF for a special internal file */ + +void +spec_setup(iop, len, allocate) +IOBUF *iop; +int len; +int allocate; +{ + char *cp; + + if (allocate) { + emalloc(cp, char *, len+2, "spec_setup"); + iop->buf = cp; + } else { + len = strlen(iop->buf); + iop->buf[len++] = '\n'; /* get_a_record clobbered it */ + iop->buf[len] = '\0'; /* just in case */ + } + iop->off = iop->buf; + iop->cnt = 0; + iop->secsiz = 0; + iop->size = len; + iop->end = iop->buf + len; + iop->fd = -1; + iop->flag = IOP_IS_INTERNAL; +} + +/* specfdopen --- open a fd special file */ + +int +specfdopen(iop, name, mode) +IOBUF *iop; +char *name, *mode; +{ + int fd; + IOBUF *tp; + + fd = devopen(name, mode); + if (fd == INVALID_HANDLE) + return INVALID_HANDLE; + tp = iop_alloc(fd); + if (tp == NULL) + return INVALID_HANDLE; + *iop = *tp; + iop->flag |= IOP_NO_FREE; + free(tp); + return 0; +} + +/* pidopen --- "open" /dev/pid, /dev/ppid, and /dev/pgrpid */ + +int +pidopen(iop, name, mode) +IOBUF *iop; +char *name, *mode; +{ + char tbuf[BUFSIZ]; + int i; + + if (name[6] == 'g') +/* following #if will improve in 2.16 */ +#if defined(__svr4__) || defined(i860) || defined(_AIX) || defined(BSD4_4) || defined(__386BSD__) + sprintf(tbuf, "%d\n", getpgrp()); +#else + sprintf(tbuf, "%d\n", getpgrp(getpid())); +#endif + else if (name[6] == 'i') + sprintf(tbuf, "%d\n", getpid()); + else + sprintf(tbuf, "%d\n", getppid()); + i = strlen(tbuf); + spec_setup(iop, i, 1); + strcpy(iop->buf, tbuf); + return 0; +} + +/* useropen --- "open" /dev/user */ + +/* + * /dev/user creates a record as follows: + * $1 = getuid() + * $2 = geteuid() + * $3 = getgid() + * $4 = getegid() + * If multiple groups are supported, the $5 through $NF are the + * supplementary group set. + */ + +int +useropen(iop, name, mode) +IOBUF *iop; +char *name, *mode; +{ + char tbuf[BUFSIZ], *cp; + int i; +#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0 + int groupset[NGROUPS_MAX]; + int ngroups; +#endif + + sprintf(tbuf, "%d %d %d %d", getuid(), geteuid(), getgid(), getegid()); + + cp = tbuf + strlen(tbuf); +#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0 + ngroups = getgroups(NGROUPS_MAX, groupset); + if (ngroups == -1) + fatal("could not find groups: %s", strerror(errno)); + + for (i = 0; i < ngroups; i++) { + *cp++ = ' '; + sprintf(cp, "%d", groupset[i]); + cp += strlen(cp); + } +#endif + *cp++ = '\n'; + *cp++ = '\0'; + + + i = strlen(tbuf); + spec_setup(iop, i, 1); + strcpy(iop->buf, tbuf); + return 0; +} + +/* iop_open --- handle special and regular files for input */ + +static IOBUF * +iop_open(name, mode) +char *name, *mode; +{ + int openfd = INVALID_HANDLE; + char *cp, *ptr; + int flag = 0; + int i; + struct stat buf; + IOBUF *iop; + static struct internal { + char *name; + int compare; + int (*fp)(); + IOBUF iob; + } table[] = { + { "/dev/fd/", 8, specfdopen }, + { "/dev/stdin", 10, specfdopen }, + { "/dev/stdout", 11, specfdopen }, + { "/dev/stderr", 11, specfdopen }, + { "/dev/pid", 8, pidopen }, + { "/dev/ppid", 9, pidopen }, + { "/dev/pgrpid", 11, pidopen }, + { "/dev/user", 9, useropen }, + }; + int devcount = sizeof(table) / sizeof(table[0]); + + flag = str2mode(mode); + + if (do_unix) + goto strictopen; + + if (STREQ(name, "-")) + openfd = fileno(stdin); + else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) { + int i; + + for (i = 0; i < devcount; i++) { + if (STREQN(name, table[i].name, table[i].compare)) { + IOBUF *iop = & table[i].iob; + + if (iop->buf != NULL) { + spec_setup(iop, 0, 0); + return iop; + } else if ((*table[i].fp)(iop, name, mode) == 0) + return iop; + else { + warning("could not open %s, mode `%s'", + name, mode); + return NULL; + } + } + } + } + +strictopen: + if (openfd == INVALID_HANDLE) + openfd = open(name, flag, 0666); + if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0) + if ((buf.st_mode & S_IFMT) == S_IFDIR) + fatal("file `%s' is a directory", name); + iop = iop_alloc(openfd); + return iop; +} + +#ifndef PIPES_SIMULATED + /* real pipes */ +static int +wait_any(interesting) +int interesting; /* pid of interest, if any */ +{ + SIGTYPE (*hstat)(), (*istat)(), (*qstat)(); + int pid; + int status = 0; + struct redirect *redp; + extern int errno; + + hstat = signal(SIGHUP, SIG_IGN); + istat = signal(SIGINT, SIG_IGN); + qstat = signal(SIGQUIT, SIG_IGN); + for (;;) { +#ifdef NeXT + pid = wait((union wait *)&status); +#else + pid = wait(&status); +#endif /* NeXT */ + if (interesting && pid == interesting) { + break; + } else if (pid != -1) { + for (redp = red_head; redp != NULL; redp = redp->next) + if (pid == redp->pid) { + redp->pid = -1; + redp->status = status; + if (redp->fp) { + pclose(redp->fp); + redp->fp = 0; + } + if (redp->iop) { + (void) iop_close(redp->iop); + redp->iop = 0; + } + break; + } + } + if (pid == -1 && errno == ECHILD) + break; + } + signal(SIGHUP, hstat); + signal(SIGINT, istat); + signal(SIGQUIT, qstat); + return(status); +} + +static IOBUF * +gawk_popen(cmd, rp) +char *cmd; +struct redirect *rp; +{ + int p[2]; + register int pid; + + /* used to wait for any children to synchronize input and output, + * but this could cause gawk to hang when it is started in a pipeline + * and thus has a child process feeding it input (shell dependant) + */ + /*(void) wait_any(0);*/ /* wait for outstanding processes */ + + if (pipe(p) < 0) + fatal("cannot open pipe \"%s\" (%s)", cmd, strerror(errno)); + if ((pid = fork()) == 0) { + if (close(1) == -1) + fatal("close of stdout in child failed (%s)", + strerror(errno)); + if (dup(p[1]) != 1) + fatal("dup of pipe failed (%s)", strerror(errno)); + if (close(p[0]) == -1 || close(p[1]) == -1) + fatal("close of pipe failed (%s)", strerror(errno)); + if (close(0) == -1) + fatal("close of stdin in child failed (%s)", + strerror(errno)); + execl("/bin/sh", "sh", "-c", cmd, 0); + _exit(127); + } + if (pid == -1) + fatal("cannot fork for \"%s\" (%s)", cmd, strerror(errno)); + rp->pid = pid; + if (close(p[1]) == -1) + fatal("close of pipe failed (%s)", strerror(errno)); + return (rp->iop = iop_alloc(p[0])); +} + +static int +gawk_pclose(rp) +struct redirect *rp; +{ + (void) iop_close(rp->iop); + rp->iop = NULL; + + /* process previously found, return stored status */ + if (rp->pid == -1) + return (rp->status >> 8) & 0xFF; + rp->status = wait_any(rp->pid); + rp->pid = -1; + return (rp->status >> 8) & 0xFF; +} + +#else /* PIPES_SIMULATED */ + /* use temporary file rather than pipe */ + +#ifdef VMS +static IOBUF * +gawk_popen(cmd, rp) +char *cmd; +struct redirect *rp; +{ + FILE *current; + + if ((current = popen(cmd, "r")) == NULL) + return NULL; + return (rp->iop = iop_alloc(fileno(current))); +} + +static int +gawk_pclose(rp) +struct redirect *rp; +{ + int rval, aval, fd = rp->iop->fd; + FILE *kludge = fdopen(fd, "r"); /* pclose needs FILE* w/ right fileno */ + + rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */ + rval = iop_close(rp->iop); + rp->iop = NULL; + aval = pclose(kludge); + return (rval < 0 ? rval : aval); +} +#else /* VMS */ + +static +struct { + char *command; + char *name; +} pipes[_NFILE]; + +static IOBUF * +gawk_popen(cmd, rp) +char *cmd; +struct redirect *rp; +{ + extern char *strdup(const char *); + int current; + char *name; + static char cmdbuf[256]; + + /* get a name to use. */ + if ((name = tempnam(".", "pip")) == NULL) + return NULL; + sprintf(cmdbuf,"%s > %s", cmd, name); + system(cmdbuf); + if ((current = open(name,O_RDONLY)) == INVALID_HANDLE) + return NULL; + pipes[current].name = name; + pipes[current].command = strdup(cmd); + rp->iop = iop_alloc(current); + return (rp->iop = iop_alloc(current)); +} + +static int +gawk_pclose(rp) +struct redirect *rp; +{ + int cur = rp->iop->fd; + int rval; + + rval = iop_close(rp->iop); + rp->iop = NULL; + + /* check for an open file */ + if (pipes[cur].name == NULL) + return -1; + unlink(pipes[cur].name); + free(pipes[cur].name); + pipes[cur].name = NULL; + free(pipes[cur].command); + return rval; +} +#endif /* VMS */ + +#endif /* PIPES_SIMULATED */ + +NODE * +do_getline(tree) +NODE *tree; +{ + struct redirect *rp = NULL; + IOBUF *iop; + int cnt = EOF; + char *s = NULL; + int errcode; + + while (cnt == EOF) { + if (tree->rnode == NULL) { /* no redirection */ + iop = nextfile(0); + if (iop == NULL) /* end of input */ + return tmp_number((AWKNUM) 0.0); + } else { + int redir_error = 0; + + rp = redirect(tree->rnode, &redir_error); + if (rp == NULL && redir_error) { /* failed redirect */ + if (! do_unix) { + char *s = strerror(redir_error); + + unref(ERRNO_node->var_value); + ERRNO_node->var_value = + make_string(s, strlen(s)); + } + return tmp_number((AWKNUM) -1.0); + } + iop = rp->iop; + if (iop == NULL) /* end of input */ + return tmp_number((AWKNUM) 0.0); + } + errcode = 0; + cnt = get_a_record(&s, iop, *RS, & errcode); + if (! do_unix && errcode != 0) { + char *s = strerror(errcode); + + unref(ERRNO_node->var_value); + ERRNO_node->var_value = make_string(s, strlen(s)); + return tmp_number((AWKNUM) -1.0); + } + if (cnt == EOF) { + if (rp) { + /* + * Don't do iop_close() here if we are + * reading from a pipe; otherwise + * gawk_pclose will not be called. + */ + if (!(rp->flag & RED_PIPE)) { + (void) iop_close(iop); + rp->iop = NULL; + } + rp->flag |= RED_EOF; /* sticky EOF */ + return tmp_number((AWKNUM) 0.0); + } else + continue; /* try another file */ + } + if (!rp) { + NR += 1; + FNR += 1; + } + if (tree->lnode == NULL) /* no optional var. */ + set_record(s, cnt, 1); + else { /* assignment to variable */ + Func_ptr after_assign = NULL; + NODE **lhs; + + lhs = get_lhs(tree->lnode, &after_assign); + unref(*lhs); + *lhs = make_string(s, strlen(s)); + (*lhs)->flags |= MAYBE_NUM; + /* we may have to regenerate $0 here! */ + if (after_assign) + (*after_assign)(); + } + } + return tmp_number((AWKNUM) 1.0); +} + +int +pathopen (file) +char *file; +{ + int fd = do_pathopen(file); + +#ifdef DEFAULT_FILETYPE + if (! do_unix && fd <= INVALID_HANDLE) { + char *file_awk; + int save = errno; +#ifdef VMS + int vms_save = vaxc$errno; +#endif + + /* append ".awk" and try again */ + emalloc(file_awk, char *, strlen(file) + + sizeof(DEFAULT_FILETYPE) + 1, "pathopen"); + sprintf(file_awk, "%s%s", file, DEFAULT_FILETYPE); + fd = do_pathopen(file_awk); + free(file_awk); + if (fd <= INVALID_HANDLE) { + errno = save; +#ifdef VMS + vaxc$errno = vms_save; +#endif + } + } +#endif /*DEFAULT_FILETYPE*/ + + return fd; +} + +static int +do_pathopen (file) +char *file; +{ + static char *savepath = DEFPATH; /* defined in config.h */ + static int first = 1; + char *awkpath, *cp; + char trypath[BUFSIZ]; + int fd; + + if (STREQ(file, "-")) + return (0); + + if (do_unix) + return (devopen(file, "r")); + + if (first) { + first = 0; + if ((awkpath = getenv ("AWKPATH")) != NULL && *awkpath) + savepath = awkpath; /* used for restarting */ + } + awkpath = savepath; + + /* some kind of path name, no search */ +#ifdef VMS /* (strchr not equal implies either or both not NULL) */ + if (strchr(file, ':') != strchr(file, ']') + || strchr(file, '>') != strchr(file, '/')) +#else /*!VMS*/ +#ifdef MSDOS + if (strchr(file, '/') != strchr(file, '\\') + || strchr(file, ':') != NULL) +#else + if (strchr(file, '/') != NULL) +#endif /*MSDOS*/ +#endif /*VMS*/ + return (devopen(file, "r")); + + do { + trypath[0] = '\0'; + /* this should take into account limits on size of trypath */ + for (cp = trypath; *awkpath && *awkpath != ENVSEP; ) + *cp++ = *awkpath++; + + if (cp != trypath) { /* nun-null element in path */ + /* add directory punctuation only if needed */ +#ifdef VMS + if (strchr(":]>/", *(cp-1)) == NULL) +#else +#ifdef MSDOS + if (strchr(":\\/", *(cp-1)) == NULL) +#else + if (*(cp-1) != '/') +#endif +#endif + *cp++ = '/'; + /* append filename */ + strcpy (cp, file); + } else + strcpy (trypath, file); + if ((fd = devopen(trypath, "r")) >= 0) + return (fd); + + /* no luck, keep going */ + if(*awkpath == ENVSEP && awkpath[1] != '\0') + awkpath++; /* skip colon */ + } while (*awkpath); + /* + * You might have one of the awk + * paths defined, WITHOUT the current working directory in it. + * Therefore try to open the file in the current directory. + */ + return (devopen(file, "r")); +} diff --git a/gnu/usr.bin/awk/iop.c b/gnu/usr.bin/awk/iop.c new file mode 100644 index 0000000..0d7af12 --- /dev/null +++ b/gnu/usr.bin/awk/iop.c @@ -0,0 +1,318 @@ +/* + * iop.c - do i/o related things. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +#ifndef atarist +#define INVALID_HANDLE (-1) +#else +#include <stddef.h> +#include <fcntl.h> +#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1) +#endif /* atarist */ + + +#ifdef TEST +int bufsize = 8192; + +void +fatal(s) +char *s; +{ + printf("%s\n", s); + exit(1); +} +#endif + +int +optimal_bufsize(fd) +int fd; +{ + struct stat stb; + +#ifdef VMS + /* + * These values correspond with the RMS multi-block count used by + * vms_open() in vms/vms_misc.c. + */ + if (isatty(fd) > 0) + return BUFSIZ; + else if (fstat(fd, &stb) < 0) + return 8*512; /* conservative in case of DECnet access */ + else + return 24*512; + +#else + /* + * System V doesn't have the file system block size in the + * stat structure. So we have to make some sort of reasonable + * guess. We use stdio's BUFSIZ, since that is what it was + * meant for in the first place. + */ +#ifdef BLKSIZE_MISSING +#define DEFBLKSIZE BUFSIZ +#else +#define DEFBLKSIZE (stb.st_blksize ? stb.st_blksize : BUFSIZ) +#endif + +#ifdef TEST + return bufsize; +#else +#ifndef atarist + if (isatty(fd)) +#else + /* + * On ST redirected stdin does not have a name attached + * (this could be hard to do to) and fstat would fail + */ + if (0 == fd || isatty(fd)) +#endif /*atarist */ + return BUFSIZ; +#ifndef BLKSIZE_MISSING + /* VMS POSIX 1.0: st_blksize is never assigned a value, so zero it */ + stb.st_blksize = 0; +#endif + if (fstat(fd, &stb) == -1) + fatal("can't stat fd %d (%s)", fd, strerror(errno)); + if (lseek(fd, (off_t)0, 0) == -1) + return DEFBLKSIZE; + return ((int) (stb.st_size < DEFBLKSIZE ? stb.st_size : DEFBLKSIZE)); +#endif /*! TEST */ +#endif /*! VMS */ +} + +IOBUF * +iop_alloc(fd) +int fd; +{ + IOBUF *iop; + + if (fd == INVALID_HANDLE) + return NULL; + emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc"); + iop->flag = 0; + if (isatty(fd)) + iop->flag |= IOP_IS_TTY; + iop->size = optimal_bufsize(fd); + iop->secsiz = -2; + errno = 0; + iop->fd = fd; + iop->off = iop->buf = NULL; + iop->cnt = 0; + return iop; +} + +/* + * Get the next record. Uses a "split buffer" where the latter part is + * the normal read buffer and the head part is an "overflow" area that is used + * when a record spans the end of the normal buffer, in which case the first + * part of the record is copied into the overflow area just before the + * normal buffer. Thus, the eventual full record can be returned as a + * contiguous area of memory with a minimum of copying. The overflow area + * is expanded as needed, so that records are unlimited in length. + * We also mark both the end of the buffer and the end of the read() with + * a sentinel character (the current record separator) so that the inside + * loop can run as a single test. + */ +int +get_a_record(out, iop, grRS, errcode) +char **out; +IOBUF *iop; +register int grRS; +int *errcode; +{ + register char *bp = iop->off; + char *bufend; + char *start = iop->off; /* beginning of record */ + int saw_newline; + char rs; + int eat_whitespace; + + if (iop->cnt == EOF) /* previous read hit EOF */ + return EOF; + + if (grRS == 0) { /* special case: grRS == "" */ + rs = '\n'; + eat_whitespace = 0; + saw_newline = 0; + } else + rs = (char) grRS; + + /* set up sentinel */ + if (iop->buf) { + bufend = iop->buf + iop->size + iop->secsiz; + *bufend = rs; + } else + bufend = NULL; + + for (;;) { /* break on end of record, read error or EOF */ + + /* Following code is entered on the first call of this routine + * for a new iop, or when we scan to the end of the buffer. + * In the latter case, we copy the current partial record to + * the space preceding the normal read buffer. If necessary, + * we expand this space. This is done so that we can return + * the record as a contiguous area of memory. + */ + if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) { + char *oldbuf = NULL; + char *oldsplit = iop->buf + iop->secsiz; + long len; /* record length so far */ + + if ((iop->flag & IOP_IS_INTERNAL) != 0) + cant_happen(); + + len = bp - start; + if (len > iop->secsiz) { + /* expand secondary buffer */ + if (iop->secsiz == -2) + iop->secsiz = 256; + while (len > iop->secsiz) + iop->secsiz *= 2; + oldbuf = iop->buf; + emalloc(iop->buf, char *, + iop->size+iop->secsiz+2, "get_a_record"); + bufend = iop->buf + iop->size + iop->secsiz; + *bufend = rs; + } + if (len > 0) { + char *newsplit = iop->buf + iop->secsiz; + + if (start < oldsplit) { + memcpy(newsplit - len, start, + oldsplit - start); + memcpy(newsplit - (bp - oldsplit), + oldsplit, bp - oldsplit); + } else + memcpy(newsplit - len, start, len); + } + bp = iop->end = iop->off = iop->buf + iop->secsiz; + start = bp - len; + if (oldbuf) { + free(oldbuf); + oldbuf = NULL; + } + } + /* Following code is entered whenever we have no more data to + * scan. In most cases this will read into the beginning of + * the main buffer, but in some cases (terminal, pipe etc.) + * we may be doing smallish reads into more advanced positions. + */ + if (bp >= iop->end) { + if ((iop->flag & IOP_IS_INTERNAL) != 0) { + iop->cnt = EOF; + break; + } + iop->cnt = read(iop->fd, iop->end, bufend - iop->end); + if (iop->cnt == -1) { + if (! do_unix && errcode != NULL) { + *errcode = errno; + iop->cnt = EOF; + break; + } else + fatal("error reading input: %s", + strerror(errno)); + } else if (iop->cnt == 0) { + iop->cnt = EOF; + break; + } + iop->end += iop->cnt; + *iop->end = rs; + } + if (grRS == 0) { + extern int default_FS; + + if (default_FS && (bp == start || eat_whitespace)) { + while (bp < iop->end && isspace(*bp)) + bp++; + if (bp == iop->end) { + eat_whitespace = 1; + continue; + } else + eat_whitespace = 0; + } + if (saw_newline && *bp == rs) { + bp++; + break; + } + saw_newline = 0; + } + + while (*bp++ != rs) + ; + + if (bp <= iop->end) { + if (grRS == 0) + saw_newline = 1; + else + break; + } else + bp--; + + if ((iop->flag & IOP_IS_INTERNAL) != 0) + iop->cnt = bp - start; + } + if (iop->cnt == EOF + && (((iop->flag & IOP_IS_INTERNAL) != 0) || start == bp)) + return EOF; + + iop->off = bp; + bp--; + if (*bp != rs) + bp++; + *bp = '\0'; + if (grRS == 0) { + if (*--bp == rs) + *bp = '\0'; + else + bp++; + } + + *out = start; + return bp - start; +} + +#ifdef TEST +main(argc, argv) +int argc; +char *argv[]; +{ + IOBUF *iop; + char *out; + int cnt; + char rs[2]; + + rs[0] = 0; + if (argc > 1) + bufsize = atoi(argv[1]); + if (argc > 2) + rs[0] = *argv[2]; + iop = iop_alloc(0); + while ((cnt = get_a_record(&out, iop, rs[0], NULL)) > 0) { + fwrite(out, 1, cnt, stdout); + fwrite(rs, 1, 1, stdout); + } +} +#endif diff --git a/gnu/usr.bin/awk/main.c b/gnu/usr.bin/awk/main.c new file mode 100644 index 0000000..77d0bf7 --- /dev/null +++ b/gnu/usr.bin/awk/main.c @@ -0,0 +1,731 @@ +/* + * main.c -- Expression tree constructors and main program for gawk. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "getopt.h" +#include "awk.h" +#include "patchlevel.h" + +static void usage P((int exitval)); +static void copyleft P((void)); +static void cmdline_fs P((char *str)); +static void init_args P((int argc0, int argc, char *argv0, char **argv)); +static void init_vars P((void)); +static void pre_assign P((char *v)); +SIGTYPE catchsig P((int sig, int code)); +static void gawk_option P((char *optstr)); +static void nostalgia P((void)); +static void version P((void)); +char *gawk_name P((char *filespec)); + +#ifdef MSDOS +extern int isatty P((int)); +#endif + +extern void resetup P((void)); + +/* These nodes store all the special variables AWK uses */ +NODE *FS_node, *NF_node, *RS_node, *NR_node; +NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node; +NODE *CONVFMT_node; +NODE *ERRNO_node; +NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node; +NODE *ENVIRON_node, *IGNORECASE_node; +NODE *ARGC_node, *ARGV_node, *ARGIND_node; +NODE *FIELDWIDTHS_node; + +int NF; +int NR; +int FNR; +int IGNORECASE; +char *RS; +char *OFS; +char *ORS; +char *OFMT; +char *CONVFMT; + +/* + * The parse tree and field nodes are stored here. Parse_end is a dummy item + * used to free up unneeded fields without freeing the program being run + */ +int errcount = 0; /* error counter, used by yyerror() */ + +/* The global null string */ +NODE *Nnull_string; + +/* The name the program was invoked under, for error messages */ +const char *myname; + +/* A block of AWK code to be run before running the program */ +NODE *begin_block = 0; + +/* A block of AWK code to be run after the last input file */ +NODE *end_block = 0; + +int exiting = 0; /* Was an "exit" statement executed? */ +int exit_val = 0; /* optional exit value */ + +#if defined(YYDEBUG) || defined(DEBUG) +extern int yydebug; +#endif + +struct src *srcfiles = NULL; /* source file name(s) */ +int numfiles = -1; /* how many source files */ + +int do_unix = 0; /* turn off gnu extensions */ +int do_posix = 0; /* turn off gnu and unix extensions */ +int do_lint = 0; /* provide warnings about questionable stuff */ +int do_nostalgia = 0; /* provide a blast from the past */ + +int in_begin_rule = 0; /* we're in a BEGIN rule */ +int in_end_rule = 0; /* we're in a END rule */ + +int output_is_tty = 0; /* control flushing of output */ + +extern char *version_string; /* current version, for printing */ + +NODE *expression_value; + +static struct option optab[] = { + { "compat", no_argument, & do_unix, 1 }, + { "lint", no_argument, & do_lint, 1 }, + { "posix", no_argument, & do_posix, 1 }, + { "nostalgia", no_argument, & do_nostalgia, 1 }, + { "copyleft", no_argument, NULL, 'C' }, + { "copyright", no_argument, NULL, 'C' }, + { "field-separator", required_argument, NULL, 'F' }, + { "file", required_argument, NULL, 'f' }, + { "assign", required_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { "usage", no_argument, NULL, 'u' }, + { "help", no_argument, NULL, 'u' }, + { "source", required_argument, NULL, 's' }, +#ifdef DEBUG + { "parsedebug", no_argument, NULL, 'D' }, +#endif + { 0, 0, 0, 0 } +}; + +int +main(argc, argv) +int argc; +char **argv; +{ + int c; + char *scan; + extern int optind; + extern int opterr; + extern char *optarg; + int i; + + (void) signal(SIGFPE, (SIGTYPE (*) P((int))) catchsig); + (void) signal(SIGSEGV, (SIGTYPE (*) P((int))) catchsig); +#ifdef SIGBUS + (void) signal(SIGBUS, (SIGTYPE (*) P((int))) catchsig); +#endif + + myname = gawk_name(argv[0]); + argv[0] = (char *)myname; +#ifdef VMS + vms_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */ +#endif + + /* remove sccs gunk */ + if (strncmp(version_string, "@(#)", 4) == 0) + version_string += 4; + + if (argc < 2) + usage(1); + + /* initialize the null string */ + Nnull_string = make_string("", 0); + Nnull_string->numbr = 0.0; + Nnull_string->type = Node_val; + Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER); + + /* Set up the special variables */ + + /* + * Note that this must be done BEFORE arg parsing else -F + * breaks horribly + */ + init_vars(); + + /* worst case */ + emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main"); + memset(srcfiles, '\0', argc * sizeof(struct src)); + + /* Tell the regex routines how they should work. . . */ + resetup(); + + /* we do error messages ourselves on invalid options */ + opterr = 0; + + /* the + on the front tells GNU getopt not to rearrange argv */ + while ((c = getopt_long(argc, argv, "+F:f:v:W:", optab, NULL)) != EOF) { + if (do_posix) + opterr = 1; + switch (c) { + case 'F': + cmdline_fs(optarg); + break; + + case 'f': + /* + * a la MKS awk, allow multiple -f options. + * this makes function libraries real easy. + * most of the magic is in the scanner. + */ + /* The following is to allow for whitespace at the end + * of a #! /bin/gawk line in an executable file + */ + scan = optarg; + while (isspace(*scan)) + scan++; + ++numfiles; + srcfiles[numfiles].stype = SOURCEFILE; + if (*scan == '\0') + srcfiles[numfiles].val = argv[optind++]; + else + srcfiles[numfiles].val = optarg; + break; + + case 'v': + pre_assign(optarg); + break; + + case 'W': /* gawk specific options */ + gawk_option(optarg); + break; + + /* These can only come from long form options */ + case 'V': + version(); + break; + + case 'C': + copyleft(); + break; + + case 'u': + usage(0); + break; + + case 's': + if (strlen(optarg) == 0) + warning("empty argument to --source ignored"); + else { + srcfiles[++numfiles].stype = CMDLINE; + srcfiles[numfiles].val = optarg; + } + break; + +#ifdef DEBUG + case 'D': + yydebug = 2; + break; +#endif + + case '?': + default: + /* + * New behavior. If not posix, an unrecognized + * option stops argument processing so that it can + * go into ARGV for the awk program to see. This + * makes use of ``#! /bin/gawk -f'' easier. + */ + if (! do_posix) + goto out; + /* else + let getopt print error message for us */ + break; + } + } +out: + + if (do_nostalgia) + nostalgia(); + + /* POSIX compliance also implies no Unix extensions either */ + if (do_posix) + do_unix = 1; + +#ifdef DEBUG + setbuf(stdout, (char *) NULL); /* make debugging easier */ +#endif + if (isatty(fileno(stdout))) + output_is_tty = 1; + /* No -f or --source options, use next arg */ + if (numfiles == -1) { + if (optind > argc - 1) /* no args left */ + usage(1); + srcfiles[++numfiles].stype = CMDLINE; + srcfiles[numfiles].val = argv[optind]; + optind++; + } + init_args(optind, argc, (char *) myname, argv); + (void) tokexpand(); + + /* Read in the program */ + if (yyparse() || errcount) + exit(1); + + /* Set up the field variables */ + init_fields(); + + if (begin_block) { + in_begin_rule = 1; + (void) interpret(begin_block); + } + in_begin_rule = 0; + if (!exiting && (expression_value || end_block)) + do_input(); + if (end_block) { + in_end_rule = 1; + (void) interpret(end_block); + } + in_end_rule = 0; + if (close_io() != 0 && exit_val == 0) + exit_val = 1; + exit(exit_val); /* more portable */ + return exit_val; /* to suppress warnings */ +} + +/* usage --- print usage information and exit */ + +static void +usage(exitval) +int exitval; +{ + char *opt1 = " -f progfile [--]"; + char *opt2 = " [--] 'program'"; + char *regops = " [POSIX or GNU style options]"; + + version(); + fprintf(stderr, "usage: %s%s%s file ...\n %s%s%s file ...\n", + myname, regops, opt1, myname, regops, opt2); + + /* GNU long options info. Gack. */ + fputs("\nPOSIX options:\t\tGNU long options:\n", stderr); + fputs("\t-f progfile\t\t--file=progfile\n", stderr); + fputs("\t-F fs\t\t\t--field-separator=fs\n", stderr); + fputs("\t-v var=val\t\t--assign=var=val\n", stderr); + fputs("\t-W compat\t\t--compat\n", stderr); + fputs("\t-W copyleft\t\t--copyleft\n", stderr); + fputs("\t-W copyright\t\t--copyright\n", stderr); + fputs("\t-W help\t\t\t--help\n", stderr); + fputs("\t-W lint\t\t\t--lint\n", stderr); +#if 0 + fputs("\t-W nostalgia\t\t--nostalgia\n", stderr); +#endif +#ifdef DEBUG + fputs("\t-W parsedebug\t\t--parsedebug\n", stderr); +#endif + fputs("\t-W posix\t\t--posix\n", stderr); + fputs("\t-W source=program-text\t--source=program-text\n", stderr); + fputs("\t-W usage\t\t--usage\n", stderr); + fputs("\t-W version\t\t--version\n", stderr); + exit(exitval); +} + +static void +copyleft () +{ + static char blurb_part1[] = +"Copyright (C) 1989, 1991, 1992, Free Software Foundation.\n\ +\n\ +This program is free software; you can redistribute it and/or modify\n\ +it under the terms of the GNU General Public License as published by\n\ +the Free Software Foundation; either version 2 of the License, or\n\ +(at your option) any later version.\n\ +\n"; + static char blurb_part2[] = +"This program is distributed in the hope that it will be useful,\n\ +but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ +GNU General Public License for more details.\n\ +\n"; + static char blurb_part3[] = +"You should have received a copy of the GNU General Public License\n\ +along with this program; if not, write to the Free Software\n\ +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"; + + version(); + fputs(blurb_part1, stderr); + fputs(blurb_part2, stderr); + fputs(blurb_part3, stderr); + fflush(stderr); +} + +static void +cmdline_fs(str) +char *str; +{ + register NODE **tmp; + int len = strlen(str); + + tmp = get_lhs(FS_node, (Func_ptr *) 0); + unref(*tmp); + /* + * Only if in full compatibility mode check for the stupid special + * case so -F\t works as documented in awk even though the shell + * hands us -Ft. Bleah! + * + * Thankfully, Posix didn't propogate this "feature". + */ + if (str[0] == 't' && str[1] == '\0') { + if (do_lint) + warning("-Ft does not set FS to tab in POSIX awk"); + if (do_unix && ! do_posix) + str[0] = '\t'; + } + *tmp = make_str_node(str, len, SCAN); /* do process escapes */ + set_FS(); +} + +static void +init_args(argc0, argc, argv0, argv) +int argc0, argc; +char *argv0; +char **argv; +{ + int i, j; + NODE **aptr; + + ARGV_node = install("ARGV", node(Nnull_string, Node_var, (NODE *)NULL)); + aptr = assoc_lookup(ARGV_node, tmp_number(0.0)); + *aptr = make_string(argv0, strlen(argv0)); + (*aptr)->flags |= MAYBE_NUM; + for (i = argc0, j = 1; i < argc; i++) { + aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j)); + *aptr = make_string(argv[i], strlen(argv[i])); + (*aptr)->flags |= MAYBE_NUM; + j++; + } + ARGC_node = install("ARGC", + node(make_number((AWKNUM) j), Node_var, (NODE *) NULL)); +} + +/* + * Set all the special variables to their initial values. + */ +struct varinit { + NODE **spec; + char *name; + NODETYPE type; + char *strval; + AWKNUM numval; + Func_ptr assign; +}; +static struct varinit varinit[] = { +{&NF_node, "NF", Node_NF, 0, -1, set_NF }, +{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, 0 }, +{&NR_node, "NR", Node_NR, 0, 0, set_NR }, +{&FNR_node, "FNR", Node_FNR, 0, 0, set_FNR }, +{&FS_node, "FS", Node_FS, " ", 0, 0 }, +{&RS_node, "RS", Node_RS, "\n", 0, set_RS }, +{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, 0, 0, set_IGNORECASE }, +{&FILENAME_node, "FILENAME", Node_var, "-", 0, 0 }, +{&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS }, +{&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS }, +{&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT }, +{&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT }, +{&RLENGTH_node, "RLENGTH", Node_var, 0, 0, 0 }, +{&RSTART_node, "RSTART", Node_var, 0, 0, 0 }, +{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, 0 }, +{&ARGIND_node, "ARGIND", Node_var, 0, 0, 0 }, +{&ERRNO_node, "ERRNO", Node_var, 0, 0, 0 }, +{0, 0, Node_illegal, 0, 0, 0 }, +}; + +static void +init_vars() +{ + register struct varinit *vp; + + for (vp = varinit; vp->name; vp++) { + *(vp->spec) = install(vp->name, + node(vp->strval == 0 ? make_number(vp->numval) + : make_string(vp->strval, strlen(vp->strval)), + vp->type, (NODE *) NULL)); + if (vp->assign) + (*(vp->assign))(); + } +} + +void +load_environ() +{ +#if !defined(MSDOS) && !(defined(VMS) && defined(__DECC)) + extern char **environ; +#endif + register char *var, *val; + NODE **aptr; + register int i; + + ENVIRON_node = install("ENVIRON", + node(Nnull_string, Node_var, (NODE *) NULL)); + for (i = 0; environ[i]; i++) { + static char nullstr[] = ""; + + var = environ[i]; + val = strchr(var, '='); + if (val) + *val++ = '\0'; + else + val = nullstr; + aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen (var))); + *aptr = make_string(val, strlen (val)); + (*aptr)->flags |= MAYBE_NUM; + + /* restore '=' so that system() gets a valid environment */ + if (val != nullstr) + *--val = '='; + } +} + +/* Process a command-line assignment */ +char * +arg_assign(arg) +char *arg; +{ + char *cp; + Func_ptr after_assign = NULL; + NODE *var; + NODE *it; + NODE **lhs; + + cp = strchr(arg, '='); + if (cp != NULL) { + *cp++ = '\0'; + /* + * Recent versions of nawk expand escapes inside assignments. + * This makes sense, so we do it too. + */ + it = make_str_node(cp, strlen(cp), SCAN); + it->flags |= MAYBE_NUM; + var = variable(arg, 0); + lhs = get_lhs(var, &after_assign); + unref(*lhs); + *lhs = it; + if (after_assign) + (*after_assign)(); + *--cp = '='; /* restore original text of ARGV */ + } + return cp; +} + +static void +pre_assign(v) +char *v; +{ + if (!arg_assign(v)) { + fprintf (stderr, + "%s: '%s' argument to -v not in 'var=value' form\n", + myname, v); + usage(1); + } +} + +SIGTYPE +catchsig(sig, code) +int sig, code; +{ +#ifdef lint + code = 0; sig = code; code = sig; +#endif + if (sig == SIGFPE) { + fatal("floating point exception"); + } else if (sig == SIGSEGV +#ifdef SIGBUS + || sig == SIGBUS +#endif + ) { + msg("fatal error: internal error"); + /* fatal won't abort() if not compiled for debugging */ + abort(); + } else + cant_happen(); + /* NOTREACHED */ +} + +/* gawk_option --- do gawk specific things */ + +static void +gawk_option(optstr) +char *optstr; +{ + char *cp; + + for (cp = optstr; *cp; cp++) { + switch (*cp) { + case ' ': + case '\t': + case ',': + break; + case 'v': + case 'V': + /* print version */ + if (strncasecmp(cp, "version", 7) != 0) + goto unknown; + else + cp += 6; + version(); + break; + case 'c': + case 'C': + if (strncasecmp(cp, "copyright", 9) == 0) { + cp += 8; + copyleft(); + } else if (strncasecmp(cp, "copyleft", 8) == 0) { + cp += 7; + copyleft(); + } else if (strncasecmp(cp, "compat", 6) == 0) { + cp += 5; + do_unix = 1; + } else + goto unknown; + break; + case 'n': + case 'N': + /* + * Undocumented feature, + * inspired by nostalgia, and a T-shirt + */ + if (strncasecmp(cp, "nostalgia", 9) != 0) + goto unknown; + nostalgia(); + break; + case 'p': + case 'P': +#ifdef DEBUG + if (strncasecmp(cp, "parsedebug", 10) == 0) { + cp += 9; + yydebug = 2; + break; + } +#endif + if (strncasecmp(cp, "posix", 5) != 0) + goto unknown; + cp += 4; + do_posix = do_unix = 1; + break; + case 'l': + case 'L': + if (strncasecmp(cp, "lint", 4) != 0) + goto unknown; + cp += 3; + do_lint = 1; + break; + case 'H': + case 'h': + if (strncasecmp(cp, "help", 4) != 0) + goto unknown; + cp += 3; + usage(0); + break; + case 'U': + case 'u': + if (strncasecmp(cp, "usage", 5) != 0) + goto unknown; + cp += 4; + usage(0); + break; + case 's': + case 'S': + if (strncasecmp(cp, "source=", 7) != 0) + goto unknown; + cp += 7; + if (strlen(cp) == 0) + warning("empty argument to -Wsource ignored"); + else { + srcfiles[++numfiles].stype = CMDLINE; + srcfiles[numfiles].val = cp; + return; + } + break; + default: + unknown: + fprintf(stderr, "'%c' -- unknown option, ignored\n", + *cp); + break; + } + } +} + +/* nostalgia --- print the famous error message and die */ + +static void +nostalgia() +{ + fprintf(stderr, "awk: bailing out near line 1\n"); + abort(); +} + +/* version --- print version message */ + +static void +version() +{ + fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL); +} + +/* static */ +char * +gawk_name(filespec) +char *filespec; +{ + char *p; + +#ifdef VMS /* "device:[root.][directory.subdir]GAWK.EXE;n" -> "GAWK" */ + char *q; + + p = strrchr(filespec, ']'); /* directory punctuation */ + q = strrchr(filespec, '>'); /* alternate <international> punct */ + + if (p == NULL || q > p) p = q; + p = strdup(p == NULL ? filespec : (p + 1)); + if ((q = strrchr(p, '.')) != NULL) *q = '\0'; /* strip .typ;vers */ + + return p; +#endif /*VMS*/ + +#if defined(MSDOS) || defined(atarist) + char *q; + + p = filespec; + + if (q = strrchr(p, '\\')) + p = q + 1; + if (q = strchr(p, '.')) + *q = '\0'; + strlwr(p); + + return (p == NULL ? filespec : p); +#endif /* MSDOS || atarist */ + + /* "path/name" -> "name" */ + p = strrchr(filespec, '/'); + return (p == NULL ? filespec : p + 1); +} diff --git a/gnu/usr.bin/awk/msg.c b/gnu/usr.bin/awk/msg.c new file mode 100644 index 0000000..b60fe9d --- /dev/null +++ b/gnu/usr.bin/awk/msg.c @@ -0,0 +1,106 @@ +/* + * msg.c - routines for error messages + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +int sourceline = 0; +char *source = NULL; + +/* VARARGS2 */ +void +err(s, emsg, argp) +char *s; +char *emsg; +va_list argp; +{ + char *file; + + (void) fflush(stdout); + (void) fprintf(stderr, "%s: ", myname); + if (sourceline) { + if (source) + (void) fprintf(stderr, "%s:", source); + else + (void) fprintf(stderr, "cmd. line:"); + + (void) fprintf(stderr, "%d: ", sourceline); + } + if (FNR) { + file = FILENAME_node->var_value->stptr; + if (file) + (void) fprintf(stderr, "(FILENAME=%s ", file); + (void) fprintf(stderr, "FNR=%d) ", FNR); + } + (void) fprintf(stderr, s); + vfprintf(stderr, emsg, argp); + (void) fprintf(stderr, "\n"); + (void) fflush(stderr); +} + +/*VARARGS0*/ +void +msg(va_alist) +va_dcl +{ + va_list args; + char *mesg; + + va_start(args); + mesg = va_arg(args, char *); + err("", mesg, args); + va_end(args); +} + +/*VARARGS0*/ +void +warning(va_alist) +va_dcl +{ + va_list args; + char *mesg; + + va_start(args); + mesg = va_arg(args, char *); + err("warning: ", mesg, args); + va_end(args); +} + +/*VARARGS0*/ +void +fatal(va_alist) +va_dcl +{ + va_list args; + char *mesg; + + va_start(args); + mesg = va_arg(args, char *); + err("fatal: ", mesg, args); + va_end(args); +#ifdef DEBUG + abort(); +#endif + exit(2); +} diff --git a/gnu/usr.bin/awk/node.c b/gnu/usr.bin/awk/node.c new file mode 100644 index 0000000..65ecb0e --- /dev/null +++ b/gnu/usr.bin/awk/node.c @@ -0,0 +1,429 @@ +/* + * node.c -- routines for node management + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +extern double strtod(); + +AWKNUM +r_force_number(n) +register NODE *n; +{ + register char *cp; + register char *cpend; + char save; + char *ptr; + unsigned int newflags = 0; + +#ifdef DEBUG + if (n == NULL) + cant_happen(); + if (n->type != Node_val) + cant_happen(); + if(n->flags == 0) + cant_happen(); + if (n->flags & NUM) + return n->numbr; +#endif + + /* all the conditionals are an attempt to avoid the expensive strtod */ + + n->numbr = 0.0; + n->flags |= NUM; + + if (n->stlen == 0) + return 0.0; + + cp = n->stptr; + if (isalpha(*cp)) + return 0.0; + + cpend = cp + n->stlen; + while (cp < cpend && isspace(*cp)) + cp++; + if (cp == cpend || isalpha(*cp)) + return 0.0; + + if (n->flags & MAYBE_NUM) { + newflags = NUMBER; + n->flags &= ~MAYBE_NUM; + } + if (cpend - cp == 1) { + if (isdigit(*cp)) { + n->numbr = (AWKNUM)(*cp - '0'); + n->flags |= newflags; + } + return n->numbr; + } + + errno = 0; + save = *cpend; + *cpend = '\0'; + n->numbr = (AWKNUM) strtod((const char *)cp, &ptr); + + /* POSIX says trailing space is OK for NUMBER */ + while (isspace(*ptr)) + ptr++; + *cpend = save; + /* the >= should be ==, but for SunOS 3.5 strtod() */ + if (errno == 0 && ptr >= cpend) + n->flags |= newflags; + else + errno = 0; + + return n->numbr; +} + +/* + * the following lookup table is used as an optimization in force_string + * (more complicated) variations on this theme didn't seem to pay off, but + * systematic testing might be in order at some point + */ +static char *values[] = { + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", +}; +#define NVAL (sizeof(values)/sizeof(values[0])) + +NODE * +r_force_string(s) +register NODE *s; +{ + char buf[128]; + register char *sp = buf; + register long num = 0; + +#ifdef DEBUG + if (s == NULL) cant_happen(); + if (s->type != Node_val) cant_happen(); + if (s->flags & STR) return s; + if (!(s->flags & NUM)) cant_happen(); + if (s->stref != 0) ; /*cant_happen();*/ +#endif + + /* avoids floating point exception in DOS*/ + if ( s->numbr <= LONG_MAX && s->numbr >= -LONG_MAX) + num = (long)s->numbr; + if ((AWKNUM) num == s->numbr) { /* integral value */ + if (num < NVAL && num >= 0) { + sp = values[num]; + s->stlen = 1; + } else { + (void) sprintf(sp, "%ld", num); + s->stlen = strlen(sp); + } + s->stfmt = -1; + } else { + (void) sprintf(sp, CONVFMT, s->numbr); + s->stlen = strlen(sp); + s->stfmt = (char)CONVFMTidx; + } + s->stref = 1; + emalloc(s->stptr, char *, s->stlen + 2, "force_string"); + memcpy(s->stptr, sp, s->stlen+1); + s->flags |= STR; + return s; +} + +/* + * Duplicate a node. (For strings, "duplicate" means crank up the + * reference count.) + */ +NODE * +dupnode(n) +NODE *n; +{ + register NODE *r; + + if (n->flags & TEMP) { + n->flags &= ~TEMP; + n->flags |= MALLOC; + return n; + } + if ((n->flags & (MALLOC|STR)) == (MALLOC|STR)) { + if (n->stref < 255) + n->stref++; + return n; + } + getnode(r); + *r = *n; + r->flags &= ~(PERM|TEMP); + r->flags |= MALLOC; + if (n->type == Node_val && (n->flags & STR)) { + r->stref = 1; + emalloc(r->stptr, char *, r->stlen + 2, "dupnode"); + memcpy(r->stptr, n->stptr, r->stlen+1); + } + return r; +} + +/* this allocates a node with defined numbr */ +NODE * +mk_number(x, flags) +AWKNUM x; +unsigned int flags; +{ + register NODE *r; + + getnode(r); + r->type = Node_val; + r->numbr = x; + r->flags = flags; +#ifdef DEBUG + r->stref = 1; + r->stptr = 0; + r->stlen = 0; +#endif + return r; +} + +/* + * Make a string node. + */ +NODE * +make_str_node(s, len, flags) +char *s; +size_t len; +int flags; +{ + register NODE *r; + + getnode(r); + r->type = Node_val; + r->flags = (STRING|STR|MALLOC); + if (flags & ALREADY_MALLOCED) + r->stptr = s; + else { + emalloc(r->stptr, char *, len + 2, s); + memcpy(r->stptr, s, len); + } + r->stptr[len] = '\0'; + + if (flags & SCAN) { /* scan for escape sequences */ + char *pf; + register char *ptm; + register int c; + register char *end; + + end = &(r->stptr[len]); + for (pf = ptm = r->stptr; pf < end;) { + c = *pf++; + if (c == '\\') { + c = parse_escape(&pf); + if (c < 0) { + if (do_lint) + warning("backslash at end of string"); + c = '\\'; + } + *ptm++ = c; + } else + *ptm++ = c; + } + len = ptm - r->stptr; + erealloc(r->stptr, char *, len + 1, "make_str_node"); + r->stptr[len] = '\0'; + r->flags |= PERM; + } + r->stlen = len; + r->stref = 1; + r->stfmt = -1; + + return r; +} + +NODE * +tmp_string(s, len) +char *s; +size_t len; +{ + register NODE *r; + + r = make_string(s, len); + r->flags |= TEMP; + return r; +} + + +#define NODECHUNK 100 + +NODE *nextfree = NULL; + +NODE * +more_nodes() +{ + register NODE *np; + + /* get more nodes and initialize list */ + emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "newnode"); + for (np = nextfree; np < &nextfree[NODECHUNK - 1]; np++) + np->nextp = np + 1; + np->nextp = NULL; + np = nextfree; + nextfree = nextfree->nextp; + return np; +} + +#ifdef DEBUG +void +freenode(it) +NODE *it; +{ +#ifdef MPROF + it->stref = 0; + free((char *) it); +#else /* not MPROF */ + /* add it to head of freelist */ + it->nextp = nextfree; + nextfree = it; +#endif /* not MPROF */ +} +#endif /* DEBUG */ + +void +unref(tmp) +register NODE *tmp; +{ + if (tmp == NULL) + return; + if (tmp->flags & PERM) + return; + if (tmp->flags & (MALLOC|TEMP)) { + tmp->flags &= ~TEMP; + if (tmp->flags & STR) { + if (tmp->stref > 1) { + if (tmp->stref != 255) + tmp->stref--; + return; + } + free(tmp->stptr); + } + freenode(tmp); + } +} + +/* + * Parse a C escape sequence. STRING_PTR points to a variable containing a + * pointer to the string to parse. That pointer is updated past the + * characters we use. The value of the escape sequence is returned. + * + * A negative value means the sequence \ newline was seen, which is supposed to + * be equivalent to nothing at all. + * + * If \ is followed by a null character, we return a negative value and leave + * the string pointer pointing at the null character. + * + * If \ is followed by 000, we return 0 and leave the string pointer after the + * zeros. A value of 0 does not mean end of string. + * + * Posix doesn't allow \x. + */ + +int +parse_escape(string_ptr) +char **string_ptr; +{ + register int c = *(*string_ptr)++; + register int i; + register int count; + + switch (c) { + case 'a': + return BELL; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\n': + return -2; + case 0: + (*string_ptr)--; + return -1; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + i = c - '0'; + count = 0; + while (++count < 3) { + if ((c = *(*string_ptr)++) >= '0' && c <= '7') { + i *= 8; + i += c - '0'; + } else { + (*string_ptr)--; + break; + } + } + return i; + case 'x': + if (do_lint) { + static int didwarn; + + if (! didwarn) { + didwarn = 1; + warning("Posix does not allow \"\\x\" escapes"); + } + } + if (do_posix) + return ('x'); + i = 0; + while (1) { + if (isxdigit((c = *(*string_ptr)++))) { + i *= 16; + if (isdigit(c)) + i += c - '0'; + else if (isupper(c)) + i += c - 'A' + 10; + else + i += c - 'a' + 10; + } else { + (*string_ptr)--; + break; + } + } + return i; + default: + return c; + } +} diff --git a/gnu/usr.bin/awk/patchlevel.h b/gnu/usr.bin/awk/patchlevel.h new file mode 100644 index 0000000..c6161a1 --- /dev/null +++ b/gnu/usr.bin/awk/patchlevel.h @@ -0,0 +1 @@ +#define PATCHLEVEL 2 diff --git a/gnu/usr.bin/awk/protos.h b/gnu/usr.bin/awk/protos.h new file mode 100644 index 0000000..25af321 --- /dev/null +++ b/gnu/usr.bin/awk/protos.h @@ -0,0 +1,115 @@ +/* + * protos.h -- function prototypes for when the headers don't have them. + */ + +/* + * Copyright (C) 1991, 1992, the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef __STDC__ +#define aptr_t void * /* arbitrary pointer type */ +#else +#define aptr_t char * +#endif +extern aptr_t malloc P((MALLOC_ARG_T)); +extern aptr_t realloc P((aptr_t, MALLOC_ARG_T)); +extern aptr_t calloc P((MALLOC_ARG_T, MALLOC_ARG_T)); + +extern void free P((aptr_t)); +extern char *getenv P((char *)); + +extern char *strcpy P((char *, const char *)); +extern char *strcat P((char *, const char *)); +extern char *strncpy P((char *, const char *, int)); +extern int strcmp P((const char *, const char *)); +extern int strncmp P((const char *, const char *, int)); +#ifndef VMS +extern char *strerror P((int)); +#else +extern char *strerror P((int,...)); +#endif +extern char *strchr P((const char *, int)); +extern char *strrchr P((const char *, int)); +extern char *strstr P((const char *s1, const char *s2)); +extern int strlen P((const char *)); +extern long strtol P((const char *, char **, int)); +#if !defined(_MSC_VER) && !defined(__GNU_LIBRARY__) +extern int strftime P((char *, int, const char *, const struct tm *)); +#endif +extern time_t time P((time_t *)); +extern aptr_t memset P((aptr_t, int, size_t)); +extern aptr_t memcpy P((aptr_t, const aptr_t, size_t)); +extern aptr_t memmove P((aptr_t, const aptr_t, size_t)); +extern aptr_t memchr P((const aptr_t, int, size_t)); +extern int memcmp P((const aptr_t, const aptr_t, size_t)); + +/* extern int fprintf P((FILE *, char *, ...)); */ +extern int fprintf P(()); +#if !defined(MSDOS) && !defined(__GNU_LIBRARY__) +extern int fwrite P((const char *, int, int, FILE *)); +extern int fputs P((const char *, FILE *)); +extern int unlink P((const char *)); +#endif +extern int fflush P((FILE *)); +extern int fclose P((FILE *)); +extern FILE *popen P((const char *, const char *)); +extern int pclose P((FILE *)); +extern void abort P(()); +extern int isatty P((int)); +extern void exit P((int)); +extern int system P((const char *)); +extern int sscanf P((/* char *, char *, ... */)); +#ifndef toupper +extern int toupper P((int)); +#endif +#ifndef tolower +extern int tolower P((int)); +#endif + +extern double pow P((double x, double y)); +extern double atof P((char *)); +extern double strtod P((const char *, char **)); +extern int fstat P((int, struct stat *)); +extern int stat P((const char *, struct stat *)); +extern off_t lseek P((int, off_t, int)); +extern int fseek P((FILE *, long, int)); +extern int close P((int)); +extern int creat P(()); +extern int open P(()); +extern int pipe P((int *)); +extern int dup P((int)); +extern int dup2 P((int,int)); +extern int fork P(()); +extern int execl P((/* char *, char *, ... */)); +extern int read P((int, char *, int)); +extern int wait P((int *)); +extern void _exit P((int)); + +#ifndef __STDC__ +extern long time P((long *)); +#endif + +#ifdef NON_STD_SPRINTF +extern char *sprintf(); +#else +extern int sprintf(); +#endif /* SPRINTF_INT */ + +#undef aptr_t diff --git a/gnu/usr.bin/awk/re.c b/gnu/usr.bin/awk/re.c new file mode 100644 index 0000000..495b096 --- /dev/null +++ b/gnu/usr.bin/awk/re.c @@ -0,0 +1,208 @@ +/* + * re.c - compile regular expressions. + */ + +/* + * Copyright (C) 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +/* Generate compiled regular expressions */ + +Regexp * +make_regexp(s, len, ignorecase, dfa) +char *s; +int len; +int ignorecase; +int dfa; +{ + Regexp *rp; + char *err; + char *src = s; + char *temp; + char *end = s + len; + register char *dest; + register int c; + + /* Handle escaped characters first. */ + + /* Build a copy of the string (in dest) with the + escaped characters translated, and generate the regex + from that. + */ + emalloc(dest, char *, len + 2, "make_regexp"); + temp = dest; + + while (src < end) { + if (*src == '\\') { + c = *++src; + switch (c) { + case 'a': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + case 'x': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + c = parse_escape(&src); + if (c < 0) + cant_happen(); + *dest++ = (char)c; + break; + default: + *dest++ = '\\'; + *dest++ = (char)c; + src++; + break; + } /* switch */ + } else { + *dest++ = *src++; /* not '\\' */ + } + } /* for */ + + *dest = '\0' ; /* Only necessary if we print dest ? */ + emalloc(rp, Regexp *, sizeof(*rp), "make_regexp"); + memset((char *) rp, 0, sizeof(*rp)); + emalloc(rp->pat.buffer, char *, 16, "make_regexp"); + rp->pat.allocated = 16; + emalloc(rp->pat.fastmap, char *, 256, "make_regexp"); + + if (ignorecase) + rp->pat.translate = casetable; + else + rp->pat.translate = NULL; + len = dest - temp; + if ((err = re_compile_pattern(temp, (size_t) len, &(rp->pat))) != NULL) + fatal("%s: /%s/", err, temp); + if (dfa && !ignorecase) { + regcompile(temp, len, &(rp->dfareg), 1); + rp->dfa = 1; + } else + rp->dfa = 0; + free(temp); + return rp; +} + +int +research(rp, str, start, len, need_start) +Regexp *rp; +register char *str; +int start; +register int len; +int need_start; +{ + char *ret = str; + + if (rp->dfa) { + char save1; + char save2; + int count = 0; + int try_backref; + + save1 = str[start+len]; + str[start+len] = '\n'; + save2 = str[start+len+1]; + ret = regexecute(&(rp->dfareg), str+start, str+start+len+1, 1, + &count, &try_backref); + str[start+len] = save1; + str[start+len+1] = save2; + } + if (ret) { + if (need_start || rp->dfa == 0) + return re_search(&(rp->pat), str, start+len, start, + len, &(rp->regs)); + else + return 1; + } else + return -1; +} + +void +refree(rp) +Regexp *rp; +{ + free(rp->pat.buffer); + free(rp->pat.fastmap); + if (rp->dfa) + reg_free(&(rp->dfareg)); + free(rp); +} + +void +reg_error(s) +const char *s; +{ + fatal(s); +} + +Regexp * +re_update(t) +NODE *t; +{ + NODE *t1; + +# define CASE 1 + if ((t->re_flags & CASE) == IGNORECASE) { + if (t->re_flags & CONST) + return t->re_reg; + t1 = force_string(tree_eval(t->re_exp)); + if (t->re_text) { + if (cmp_nodes(t->re_text, t1) == 0) { + free_temp(t1); + return t->re_reg; + } + unref(t->re_text); + } + t->re_text = dupnode(t1); + free_temp(t1); + } + if (t->re_reg) + refree(t->re_reg); + if (t->re_cnt) + t->re_cnt++; + if (t->re_cnt > 10) + t->re_cnt = 0; + if (!t->re_text) { + t1 = force_string(tree_eval(t->re_exp)); + t->re_text = dupnode(t1); + free_temp(t1); + } + t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen, IGNORECASE, t->re_cnt); + t->re_flags &= ~CASE; + t->re_flags |= IGNORECASE; + return t->re_reg; +} + +void +resetup() +{ + (void) re_set_syntax(RE_SYNTAX_AWK); + regsyntax(RE_SYNTAX_AWK, 0); +} diff --git a/gnu/usr.bin/awk/version.c b/gnu/usr.bin/awk/version.c new file mode 100644 index 0000000..adea5fa --- /dev/null +++ b/gnu/usr.bin/awk/version.c @@ -0,0 +1,46 @@ +char *version_string = "@(#)Gnu Awk (gawk) 2.15"; + +/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead + of the Right Hand Side */ + +/* 1.03 Fixed split() to treat strings of space and tab as FS if + the split char is ' '. + + Added -v option to print version number + + Fixed bug that caused rounding when printing large numbers */ + +/* 2.00beta Incorporated the functionality of the "new" awk as described + the book (reference not handy). Extensively tested, but no + doubt still buggy. Badly needs tuning and cleanup, in + particular in memory management which is currently almost + non-existent. */ + +/* 2.01 JF: Modified to compile under GCC, and fixed a few + bugs while I was at it. I hope I didn't add any more. + I modified parse.y to reduce the number of reduce/reduce + conflicts. There are still a few left. */ + +/* 2.02 Fixed JF's bugs; improved memory management, still needs + lots of work. */ + +/* 2.10 Major grammar rework and lots of bug fixes from David. + Major changes for performance enhancements from David. + A number of minor bug fixes and new features from Arnold. + Changes for MSDOS from Conrad Kwok and Scott Garfinkle. + The gawk.texinfo and info files included! */ + +/* 2.11 Bug fix release to 2.10. Lots of changes for portability, + speed, and configurability. */ + +/* 2.12 Lots of changes for portability, speed, and configurability. + Several bugs fixed. POSIX compliance. Removal of last set + of hard-wired limits. Atari and VMS ports added. */ + +/* 2.13 Public release of 2.12 */ + +/* 2.14 Mostly bug fixes. */ + +/* 2.15 Bug fixes plus intermixing of command-line source and files, + GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files. */ + diff --git a/gnu/usr.bin/tar/COPYING b/gnu/usr.bin/tar/COPYING new file mode 100644 index 0000000..a43ea21 --- /dev/null +++ b/gnu/usr.bin/tar/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) 19yy <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/gnu/usr.bin/tar/ChangeLog b/gnu/usr.bin/tar/ChangeLog new file mode 100644 index 0000000..7934f2b --- /dev/null +++ b/gnu/usr.bin/tar/ChangeLog @@ -0,0 +1,1732 @@ +Thu Mar 25 13:32:40 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * version.c: Released version 1.11.2. + + * Makefile.in (dist): Do the link differently; some of the + files have changed filesystems which makes it more complex. + + * Makefile.in (dist, shar): Use gzip instead of compress. + + * create.c (dump_file): Test for curdev==-1, not curdev<0. + Some losing NFS systems give negative device numbers + sometimes. + +Thu Mar 25 11:55:15 1993 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * level-0, level-1 (TAR_PART1): Use `--block-size', not just + `--block', which is now ambiguous. + +Wed Mar 24 22:12:51 1993 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * backup-specs (TAR): New variable. + + * level-0, level-1 (TAR_PART1): Get path of GNU tar from `TAR' + variable, don't hardcode it. + +Sat Mar 20 00:20:05 1993 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * backup-specs (SLEEP_MESSAGE): put backslashes in front of nested + double quotes. + + * level-0, level-1 (BACKUP_DIRS): Don't put in quotes. + (LOGFILE): Use sed to construct name, not awk. + + * dump-remind (recipients): Replaced inefficient pipeline with a + single, simple sed script. + (volno): Deal with the possibility that VOLNO_FILE may not be + created yet. + +Fri Mar 19 15:05:15 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * backup-specs (VOLNO_FILE): Removed abusive comment by Noah. + + * buffer.c (new_volume): Write the global volume number to the + volno file before running the info script, so that the script + can look at it. + +Thu Mar 18 20:11:54 1993 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * Makefile.in (AUX): Include `dump-remind' in distribution. + + * backup-specs (SLEEP_MESSAGE): New variable. + level-0, level-1: Use it instead of external `dont_touch' file. + + * level-0, level-1: Put most of the script in () and pipe + everything from the subshell through tee -a $LOGFILE. Since you + really want most of the output to go to the logfile anyway, and + since all those pipelines were preventing one from getting the + exit status of most commands, this seems like the right idea. + + * level-0, level-1 (LOGFILE): Use YYYY-MM-DD (all numeric) format + for log file name, since that makes the file names sortable in a + coherent way. Suffix should always be `level-n' where n is the + dump level. level-0 script was just using `-full' instead. + + * level-0, level-1 (DUMP_LEVEL): New variable. Set to `0' or `1' + in each script as appropriate. + + * level-0, level-1 (HOST): Renamed to `localhost' for clarity. + (host): renamed to `remotehost' for clarity. + + * level-0, level-1 (startdate): New variable. Use it in Subject + line of mailed report. + + * level-0, level-1: Fixed all instances where sed is called with a + script on the command line to use `-e' option. + + * level-0, level-1: Don't try to call logfile.sed to filter + LOGFILE. It's not distributed with tar and was never really used + anyway. + + * level-0, level-1: Put quotes around most variable names (barring + those that are known to intentionally contain text that should be + expanded into multiple words, like `TAR_PART1'). + + * level-0, level-1: Got rid of annoying trailing backslashes in awk + scripts. They were gratuitous. Made them a little more readable + by adding some whitespace. + +Wed Mar 17 10:30:58 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * tar.c (describe, long_options): Changed --compress-block to + --block-compress. + (options): Fixed f_compress_block sanity check error message + to give the correct name of the option. + +Tue Mar 16 14:52:40 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * extract.c (extract_archive): case LF_DIR: Do chown when + necessary. Don't bother jumping to set_filestat for + f_modified; repeat the chmod code here. Replace `break', + deleted on 2 September 1992. + + * tar.c (describe, long_options, options): Added gzip options + and use-compress-program option. + * tar.h: Added new compression options. + * buffer.c (child_open, open_archive): Use new compression options. + + * create.c (start_header): Only mask off high bits when + creating old-style archives. + * list.c (decode_header): Mask off potentially misleading + high bits from the mode when reading headers. + +Mon Mar 15 11:34:34 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * extract.c (extract_archive): Put arguments in the right + order for error message. + + * create.c (deal_with_sparse): if the last byte was null, we + didn't write it out. + + * gnu.c, create.c, extract.c, diffarch.c, list.c throughout: + Replace malloc calls with ck_malloc and realloc with ck_realloc. + + * tar.c (describe): Improve doc for -L. + + * tar.c (name_next): Don't apply exclusion to explicitly named + files. + + * tar.c (long_options, describe): Added new-volume-script as + an alias for info-script. + + * extract.c (extract_archive): LF_DUMPDIR case; misplaced paren. + + * extract.c (extract_archive): extract_file case, first if, + include space for null in namelen computation. + + * extract.c (extract_sparse_file): Use value returned by write + to properly create error message. + + * create.c (create_archive): Don't assume we have anything to + dump. + + * buffer.c (open_archive): Set current_file_name for the + volume header so that verbose listings work properly. + + * Makefile.in (realclean): Added getdate.c. + +Thu Jan 14 23:38:44 1993 David J. MacKenzie (djm@kropotkin.gnu.ai.mit.edu) + + * tar.c: Include fnmatch.h after port.h to make sure we get our FNM_* + (e.g. on HPUX 8). + +Tue Nov 24 08:30:54 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) + + * tar.c (addname), gnu.c (read_dir_file): Use HAVE_GETCWD, not USG. + + * port.h, rmt.h: Use HAVE_STRING_H, not USG. + + * port.h: Add dir header decls. + * create.c, gnu.c: Use SYSNDIR, SYSDIR, and NDIR + instead of BSD42 and USG. Rename DP_NAMELEN to NLENGTH. + Use `struct dirent' instead of `struct direct'. + * create.c, gnu.c, tar.c: Remove dir header decls. + +Wed Nov 18 15:31:30 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) + + * tar.c: Change FNM_TARPATH to FNM_LEADING_DIR to match change + in fnmatch.[ch]. + +Wed Oct 21 00:52:24 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * level-0, level-1: put curly braces around variables for clarity. + + * backup-specs (DUMP_REMIND_SCRIPT): define it (but commented out + so that distributed dump scripts won't use it by default). + level-0, level-1 (TAR_PART1): use --info-script if + DUMP_REMIND_SCRIPT is defined. + dump-remind: new file (intended as an example). + +Thu Oct 15 03:33:28 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * level-0, level-1: remove $LOGFILE.tmp files before exiting. + +Fri Oct 2 00:28:01 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) + + * tar.c (describe): Fix some tab alignments. + + * Makefile.in (SRC3): Add getdate.c, for systems without bison/yacc + (like MS-DOS). + + * diffarch.c (diff_sparse_files): Add missing arg to fprintf calls. + + * extract.c (extract_archive, restore_saved_dir_info), + buffer.c (child_open), list.c (decode_header, print_header): + Delete unused vars. + + * port.c [__MSDOS__]: Have strstr, rename, and mkdir. Don't + define ck_pipe. + + * buffer.c, tar.c (init_volume_number, closeout_volume_number), + create.c (write_long): Declare as void, not int, since they + don't return a value. + +Thu Sep 24 00:06:02 1992 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * level-0, level-1 (TAR_PART1): remove --atime-preserve + because of a total screw. + +Tue Sep 22 14:15:48 1992 Michael I Bushnell (mib@wookumz.gnu.ai.mit.edu) + + * buffer.c (close_archive): Removed leftover `break' from when + this was a switch. + +Tue Sep 22 08:33:16 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * create.c, port.h: indented #pragma directives with 1 space. + +Fri Sep 18 14:15:17 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * All source files: re indented using GNU indent. + + * rtapelib.c (__rmt_read): Only read the amount left in the + buffer; otherwise a broken rmt server (which puts too much + data out) could overwrite past our buffer. + +Thu Sep 17 14:08:58 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * create.c: Throughout, use struct utimbuf rather than array + of longs. + + * configure.in: Check for getpwuid and getgrgid. + + * Makefile.in (SRC3, AUX): Move alloca.c to SRC3. + (OBJ3): Add @ALLOCA@. + + * Makefile.in (getdate.c): Look in srcdir for getdate.y. + + * buffer.c (close_archive): We can't check WTERMSIG + meaningfully unless we already know tha WIFSIGNALED is true. + (There is no guarantee it WTERMSIG will return zero when + WIFSIGNALED is false.) + * port.c (rmdir, mkdir): Check WIFSIGNALED rather than + WTERMSIG. + + * Makefile.in (getdate.c): Use $(YACC) instead of `yacc'. + +Tue Sep 15 14:49:48 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * version.c: Released version 1.11.1. + + * Makefile (AUX): Added NEWS. + + * Makefile.in (rmt): Added $(LIBS). + * configure.in: Added tests for libraries needed on Solaris. + + * mangle.c (extract_mangle): Null terminate link name for + losing archives missing it. + + * Makefile.in: added target and rule for getdate.c: getdate.y; + some makes don't have one built in. + +Mon Sep 14 16:23:15 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * tar.c (options, main): Advise use of --help rather than + +help. + + * create.c (write_long): Using hstat here is a Bad Idea, and + totally unnecessary at that. + + * list.c (read_header): Compute both signed and normal + checksums. + + * configure.in: Define BSD in the presence of /sdmach or + /../../mach. + + * diffarch.c, buffer.c: Declare valloc as void* rather than + char*. + + * Makefile.in: Don't install info files. + + * configure.in: Check for malloc was scrambled. + + * port.h: Undefine index and rindex if necessary; some + string.h's define them for us. + + * tar.c (addname): Missing braces after if. + * gnu.c (read_dir_file): Missing braces after if. + + * names.c: Add include of <stdio.h>, + + * create.c (start_header): Set current_file_name so that + print_header (used for verbose create) works properly. + (dump_file): Set current_link_name when setting up symlink + and hardlink records. + +Fri Sep 11 01:05:52 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu) + + * fnmatch.[ch]: New files. + * wildmat.c: File removed. + * tar.c: Include fnmatch.h and use fnmatch instead of wildmat. + * Makefile.in, makefile.pc: Replace wildmat.o(bj) with fnmatch. + +Thu Sep 10 23:19:30 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu) + + * buffer.c, tar.c: Remove redundant decls of getenv, rindex. + + * Makefile.in: Add uninstall target. + Define libdir instead of hardcoding /etc for installing rmt. + +Thu Sep 10 13:06:03 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * list.c (read_header): On second thought, that doesn't work + either, so just store the names in malloced areas. Sigh. + + * NEWS: New file. + * README: Removed things that belong in NEWS; point to it. + + * list.c (read_header): current_file_name and + current_link_name need to be set to the arrays in head rather + than header; header is the actual read buffer and will change. + + * extract.c (extract_archive): + * buffer.c (new_volume): `#' directives need to start in + column 1. + +Thu Sep 10 06:09:18 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * level-0, level-1 (TAR_PART1): put --atime-preserve inside quotes. + +Wed Sep 9 13:34:26 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * Makefile.in (AUX): Add getpagesize.h. + (AUX): Comment out manuals. + (all): Comment out dependency on tar.info. + + * version.c: Release of version 1.11. + + * level-0, level-1 (TAR_PART1): Use --atime-preserve. + + * Makefile, configure.in: Arrange to use local malloc on HP-UX. + + * port.h Use the canonical Autoconf chunk for alloca instead + of just looking for gcc. + +Wed Sep 9 03:16:58 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * port.h: If compiling with gcc, use __builtin_alloca. + +Tue Sep 8 16:13:41 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * extract.c: Removed long name support from here. + * list.c (read_header): Understand and skip longname/longlink + headers here. Names for current file are stored in new global + variables. All source files except create.c changed to refer + to current_file_name and current_link_name instead of fields + directly from the current header. + +Thu Sep 3 12:41:08 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * create.c (write_long): New function. + (dump_file): When writing link records or symlink records, use + new write_long function instead of mangling when the link + target is too long. + (start_header): Use write_long instead of mangling for long + names. + * extract.c (saverec): Recognize LF_LONGNAME and LF_LONGLINK. + (saverec): Throughout, use longname and longlink if they are set. + +Wed Sep 2 14:41:13 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * mangle.c: This is now deprecated; retain extract_mangle for + backward compatability. + + * list.c (print_header): patch from Chris Arthur to prevent + printing 0 when the gid or uid is null. + + * list.c (decode_header): patch from Chris Arthur to use the + gid field when the gid is empty, and similarly for uid. + + * extract.c: saved_dir_info, saved_dir_info_head: new type and + var. + (extract_archive): When extracting directories, now save info + in saved_dir_info_head. + (restore_saved_dir_info): New function. + * list.c (read_and): Call restore_saved_dir_info at the end of + the run. + This patch is from Chris Arthur (csa@pennies.sw.stratus.com). + +Mon Aug 31 15:39:55 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * create.c (create_archive): If there are no names specified, + write nothing on the archive instead of dumping ".". + + * buffer.c (open_archive): Useful error message. + + * tar.c, tar.h: Recognize f_atime_preserve. + * create.c (dump_file): Implement f_atime_preserve. + + * rmt.h (_remdev): Don't require /dev/ to be in remote archive + names; obey new force-local flag. + * tar.c, tar.h: Implement new force-local flag. + + * tar.c (describe): same-owner and same-order were confused. + + * create.c (dump_file): Check for toplevel had sense reversed. + + * buffer.c (new_archive): Don't free old_name...when these + come from the command line, they aren't malloced, and it isn't + important to save this trivial amount of memory. + + * tar.h: replace ar_file with ar_files, n_ar_files, + cur_ar_files. + * buffer.c (open_archive): multi-volume compressed archives + never worked; give an appropriate error. Change open of + ar_file to open of ar_files[0]. + (writeerror, readerror, flush_archive): use + ar_files[cur_ar_file] instead of ar_file. + (new_archive): Necessary changes to support ar_files. + * tar.c (options): handle multiple tape drive arguments. + +Fri Aug 28 17:42:13 1992 Michael I Bushnell (mib@wookumz.gnu.ai.mit.edu) + + * list.c (decode_header), create.c (start_header), tar.h (TMAGIC): + Undo djm's changes below; tar does not support the final + Posix.1 format; it's bad to make it look like it does. + +Sun Jul 19 02:13:46 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu) + + * port.h: Try to prevent redefining major. + * port.c: HAVE_BZERO -> minix. Fix a typo. + + * list.c (decode_header): Recognize the final POSIX.1 magic as + well as the early draft magic for ustar. + * create.c (start_header): Create a final POSIX.1 magic string + instead of an early draft string for ustar. + * tar.h (TMAGIC): Remove the trailing blanks. + + * rmt.c, rtapelib.c: Use POSIX and STDC headers if available. + * rmt.h: Declare the external functions defined in rtapelib.c. + +Tue Jul 14 00:44:37 1992 David J. MacKenzie (djm@apple-gunkies.gnu.ai.mit.edu) + + * pathmax.h: New file. + * port.h: Include it. + * create.c (create_archive): Allocate PATH_MAX instead of + NAME_MAX for temporary buffer so we don't have to figure out + what NAME_MAX is (portably). + +Fri Jul 10 08:30:42 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * gnu.c (collect_and_sort_names): write_dir_file has no argument. + + * level-0, level-1: Avoid silly Sun awk lossage. + +Mon Jul 6 20:11:32 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu) + + * port.c (rename): If unlinking the source at the end fails, + unlink the destination instead to avoid leaving a mess. + +Fri Jul 3 15:16:42 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu) + + * buffer.c, diffarch.c, update.c, rtapelib.c: Change NO_MTIO to + HAVE_SYS_MTIO_H. + + * port.c, tar.h: Change FOO_MISSING to HAVE_FOO. + +Tue Jun 23 23:39:02 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) + + * rmt.c: Add #ifdefs to work on ISC. + +Wed May 20 00:12:27 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu) + + * port.h: Define major, minor, makedev if the system doesn't. + +Wed May 13 21:16:38 1992 Michael I Bushnell (mib@apple-gunkies.gnu.ai.mit.edu) + + * gnu.c (add_dir_name): Store legitimate value into + dir_contents when get_dir_contents returns NULL. + +Thu May 7 23:44:35 1992 Michael I Bushnell (mib@apple-gunkies.gnu.ai.mit.edu) + + * gnu.c (add_dir_name): Check for return of NULL from get_dir_contents; + see djm's change of Fri Jul 26 01:12:58 1991. + +Mon May 4 22:50:57 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu) + + * tar.h: Make comments for option names say -- instead of +. + +Thu Apr 30 03:09:16 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu) + + * level-1: Added `$' before VOLNO_FILE in definition of TAR_PART1. + Added line to remove $VOLNO_FILE from any previous dump before + starting. + + * level-0, level-1: Change long options to use `--' instead of `+' + (support for `+' will go away soon) + +Wed Apr 29 14:23:10 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * tar.c, tar.t: Added +volno-file option. + buffer.c: New functions init_volume_number, + closeout_volume_number. + tar.c (main): Call new functions in the right place. + + * buffer.c (fl_write, fl_read): Mod to allow losing tape + drives which use short counts to indicate end of tape + correctly handle the multi-tape stuff. The read half won't + co-exist with f_reblock; there's no way to fix that, of + course. + + * tar.c, tar.h: Added new option +show-omitted-dirs, from + Karl Berry. + list.c (read_and): Implemented show-omitted-dirs. + + * tar.c, tar.h: Added new option +checkpoint. + buffer.c (fl_read, fl_write): Implemented +checkpoint lazily. + + * create.c (dump_file): Added toplevel argument; some devices + can be negative, so the old method was bogus. All callers + changed. + + * tar.c, tar.h: Added new option +ignore-failed-read. + create.c (dump_file): Implemented +ignore-failed-read. + + * create.c (finish_sparse_file): Commented out debugging printf. + + * tar.c, tar.h: Added new option +remove-files to delete files + after they are added to the archive. + create.c (dump_file): Implemented +remove-files for + everything but directories. I don't think they need it. + +Tue Apr 28 13:21:42 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * create.c: (dump_file): save_name needs to be set equal to p, + not something inside the header, because the header changes at + the first buffer flush. + +Fri Apr 24 10:41:13 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * create.c: Djm incorrectly moved the include of port.h to + precede the include of sys/file.h; restored. + + * tar.c (main): Cases CMD_EXTRACT and CMD_LIST: declare error + string with const. + + * gnu.c (collect_and_sort_names): Leave if around + write_dir_file in place. + +Wed Apr 22 02:16:14 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu) + + * rtapelib.c: SIGTYPE -> RETSIGTYPE. + +Mon Mar 9 22:42:05 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu) + + * rtapelib.c: Reformat and make comments more complete. + Rename a few variables for clarity. + +Thu Mar 5 14:07:34 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu) + + * tar.c (describe): Document long options as starting with --. + +Thu Jan 23 22:54:41 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * tar.c (options): Check get_date return value for error indication. + +Tue Dec 24 00:03:03 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * tar.c, gnu.c, extract.c, create.c, port.h, rmt.h: Change + POSIX ifdefs to HAVE_UNISTD_H and _POSIX_VERSION. + +Fri Dec 20 13:50:38 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * testpad.c (main): flush stderr so perror and fprintf + cooperate right. + +Wed Dec 18 16:52:42 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * port.h: Check MAJOR_IN_MKDEV and MAJOR_IN_SYSMACROS to find + where to get major, minor and makedev. + * create.c, list.c, update.c: Don't check USG to include + sys/sysmacros.h. + +Thu Dec 12 21:57:10 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * mangle.c (extract_mangle): Correctly null terminate name of + link target. + +Thu Nov 21 07:44:18 1991 Michael I Bushnell (mib at nutrimat) + + * create.c (dump_file, at start of ISREG output loop): use + filename from header instead of real name to make sure that we + get the mangled version and not one that is too long and + overflows buffers. + +Sat Nov 16 01:37:45 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * tar.h: Use new criteria for STDC version of msg. + +Sat Nov 2 21:31:57 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * create.c, gnu.c, tar.c: Use DIRENT instead of NDIR to select + between dirent.h and ndir.h for USG. + + * port.c: Rename WANT_FOO to FOO_MISSING to make sharing code + and configure script with other utilities easier. Use + VPRINTF_MISSING and DOPRNT_MISSING instead of FOO_MSG to + select error reporting routines. + +Thu Oct 17 20:19:02 1991 Michael I Bushnell (mib at churchy.gnu.ai.mit.edu) + + * level-0: Repair damage from previous mod: stdin to rsh must + be the terminal or tar's questions lose. + +Sat Aug 31 15:05:27 1991 Noah Friedman (friedman at nutrimat.gnu.ai.mit.edu) + + * level-0: Fixed several syntax errors associated with + stdout/stderr redirection. + Made sure remote host executes commands from sh where redirection + is necessary, since root's shell might be csh in some places and + the redirect syntax differs. + +Thu Aug 29 00:54:01 1991 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * tar.c (long_options). Fixed info-script long option. + +Mon Aug 26 16:53:50 1991 David J. MacKenzie (djm at pogo.gnu.ai.mit.edu) + + * configure, Makefile.in: Only put $< in Makefiles if VPATH + is being used, because older makes don't understand it. + +Mon Aug 19 01:47:57 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * create.c: Indent '#pragma alloca' so non-ANSI compilers + don't choke on it. + +Wed Aug 14 14:10:43 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu) + + * list.c (UGSWIDTH): Increase from 11 (sort of like Unix tar) to + 18, so that with normal user and group names of <= 8 chars, + the columns never shift in a tar -t listing. + +Fri Aug 2 00:41:08 1991 David J. MacKenzie (djm at apple-gunkies) + + * Makefile.in (dist): Include texinfo.tex and tar.info*. + (install): Install tar.info*. + * configure: Set INSTALLDATA. + + * configure: Create config.status. Remove it and Makefile if + interrupted while creating them. + + * configure: Check for +srcdir etc. arg and look for + Makefile.in in that directory. Set VPATH if srcdir is not `.'. + * Makefile.in: Add `prefix'. + (tar.info): New target. + +Tue Jul 30 17:08:04 1991 David J. MacKenzie (djm at apple-gunkies) + + * configure: NEED_TZSET has become FTIME_MISSING. + +Mon Jul 29 19:23:10 1991 David J. MacKenzie (djm at wombat.gnu.ai.mit.edu) + + * port.c [F_CHSIZE]: Additional version. + +Sat Jul 27 22:27:47 1991 David J. MacKenzie (djm at wombat.gnu.ai.mit.edu) + + * rmt.h: Clean up ifdefs. + + * makefile.pc: Fix typo. + port.h: Change MSDOS to __MSDOS__. + [__MSDOS__]: Define off_t. Include io.h and not sys/param.h. + [__TURBOC__]: Use void * and don't define const. + +Fri Jul 26 01:12:58 1991 David J. MacKenzie (djm at bleen) + + * buffer.c: Rename `eof' to `hit_eof' to avoid conflict with an + MSDOS function. + * gnu.c (get_dir_contents): Return NULL, not "\0\0\0\0", on error. + * diffarch.c (diff_archive): Open files in binary mode. + Don't use or free a non-malloc'd return value from get_dir_contents. + * msd_dir.c [__TURBOC__]: Include stdlib.h. + * rmt.h: lseek returns off_t, not long. + + * tar.c (describe): -X is +exclude-from, not +exclude. + (names_notfound): Free memory only if amiga, not !unix. + + * tar.h, tar.c: Add +null option to make -T read + null-terminated filenames (such as those produced by GNU find + -print0), and disable -C option. + This guarantees that odd filenames will get archived. + * tar.c (read_name_from_file): New function. + (name_next): Call it instead of fgets. + +Wed Jul 24 11:17:48 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * create.c [_AIX]: Declare alloca. + + * buffer.c (open_archive): Check for successful open before, + not after, fstatting the fd. + +Tue Jul 23 20:51:31 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * configure: Only define BSD42 if sys/file.h exists. + If alloca is missing and /usr/ucblib exists (SVR4), use it + instead of -lPW. + + * port.h [!__STDC__]: #define const. + * gnu.c (dirent_cmp): Fix args to agree with ANSI C prototype. + * create.c: Declare ck_realloc. + * gnu.c, diffarch.c: Move check for symlinks to after port.h include. + +Sat Jul 20 00:03:54 1991 David J. MacKenzie (djm at apple-gunkies) + + * msd_dir.[ch]: Use POSIX-style `struct dirent' instead of + `struct direct'. + * create.c, gnu.c, tar.c: Adjust callers. + +Thu Jul 18 00:05:01 1991 David J. MacKenzie (djm at bleen) + + * port.c (ck_malloc, ck_realloc): Return PTR, not char *. + * gnu.c, create.c, tar.c: Fix decls. + + * port.c: Don't use the preprocessor to guess missing + functions on Unix; let configure do it. + [WANT_GETWD] (getwd): Function removed; not needed because + getcwd is used if needed. + * gnu.c, tar.c: Use getcwd if POSIX. + + * rtapelib.c: Use SIGTYPE instead of testing SIGNAL_VOID. + Default to void (more common these days) instead of int. + + * tar.c, gnu.c, mangle.c: Remove VOIDSTAR defn. Use PTR instead. + * port.h: Define PTR. + + * gnu.c, tar.c [__MSDOS__ || USG]: Remove incorrect getcwd + decl; put correct one in port.h [!POSIX]. + + * tar.c (describe): Print on stdout instead of stderr; it's + not so much a usage message (since you have to ask for it + explicitly) as on-line help, and you really need to be able to + page it because it's more than a screen long. + + * Make #ifdefs for sys/file.h or fcntl.h, directory header, + sys/mtio.h consistent between files. Use NO_MTIO instead of + tricks with USG and HAVE_MTIO and NO_RMTIOCTL. + * Move decls of ANSI C and POSIX functions to port.h and + use standard headers to declare them if available + [STDC_HEADERS or POSIX]. + * Add many missing function declarations and return types. + * Some places used __MSDOS__, some MSDOS; standardize on __MSDOS__. + * Change S_IF macros to S_IS for POSIX. + * port.h: Define appropriate S_IS macros if missing. + * port.h: Rename macros for testing exit status to conform to + POSIX; use the system's versions if available [POSIX]. + * Use POSIX PATH_MAX and NAME_MAX instead of MAXPATHLEN and MAXNAMLEN. + * port.h: Define PATH_MAX and NAME_MAX. + * create.c, gnu.c, tar.c: Use ck_malloc and free instead of + auto arrays of size PATH_MAX or NAME_MAX, since with pathconf + they might not be constants. + * Move all definitions of O_* to port.h to reduce redundancy. + * Make all source files that now need to include port.h do so. + * port.c: Remove #undefs of WANT_* so you can use -DWANT_* + when compiling, instead of having to edit port.c. + [WANT_DUMB_GET_DATE] (get_date): Function removed. + Even systems without bison can get bison output and compile it. + [WANT_STRING] (index, rindex, bcopy, bzero, bcmp): Functions + removed; the translation is now done by macros in port.h. + * wildmat.c (wildmat): Use POSIX.2 '!' instead of '^' to negate + character classes. + +Mon Jul 15 13:47:45 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * testpad.c (main): Return type void. + + * port.c [WANT_STRING]: Don't include memory.h if NO_MEMORY_H. + + * create.c (dump_file) [AIX]: Fix typo, `allocate' for `alloca'. + * gnu.c (collect_and_sort_names): Move misplaced brace out of #ifdef. + From: Minh Tran-Le <TRANLE@intellicorp.com>. + + * configure: Also look in sys/signal.h for signal decl. + +Wed Jul 10 01:42:55 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * Rename rtape_server.c to rmt.c and rtape_lib.c to rtapelib.c. + + * configure, Makefile.in: $(INSTALLPROG) -> $(INSTALL). + +Tue Jul 9 01:38:37 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu) + + * Most files: Refer to GPL version 2. + * COPYING: Use version 2. + + * port.c [__TURBOC__] (utime): New function. + + * xmalloc: New function (just calls ck_malloc), for alloca.c + and bison.simple (in getdate.y output). + + * Makefile.in (AUX): Include alloca.c and tcexparg.c, a + command line globber for Turbo C. + +Mon Jul 8 14:30:52 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu) + + * testpad.c: Open and write to testpad.h instead of stdout, + because some MS-DOS makes (Borland's at least) can't do + redirection in commands. + * Makefile.in: Don't redirect testpad output. + +Mon Jul 8 12:56:35 1991 Michael I Bushnell (mib at churchy.gnu.ai.mit.edu) + + * buffer.c (fl_read): Missing \n in printf. + +Mon Jul 8 03:40:28 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu) + + * create.c, extract.c, gnu.c, diffarch.c, tar.c: Comment out + unused variables. + + * tar.c (options): Cast get_date arg to VOIDSTAR instead of + `struct timeb *', since on some non-BSD systems the latter is + undefined. + +Sat Jul 6 04:53:14 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu) + + * Replace Makefile with configure, Makefile.in, and makefile.pc. + Update README with current compilation instructions. + + * port.c [WANT_RENAME] (rename): New function. + +Wed Jul 3 18:10:52 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * testpad.c (main): Avoid warning from some compilers on array + address. + + * rtape_server.c (sys_errlist): Should be declared extern. + +Mon Jul 1 14:14:06 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * Release of version 1.10; appropriate changes to README. + + * create.c: Removed printf's about sparse files. + + * Fix a misplaced quote in level-0 and change some >& into + 2>&1. + +Fri Jun 21 23:04:31 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * list.c (skip_extended_headers): Userec was being called in + the wrong place. + +Thu Jun 20 19:10:35 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu) + + * tar.h: Use ANSI prototypes for msg and msg_perror if + STDC_MSG is defined, even if BSD42 is also. + + * Makefile: Replace DESTDIR with bindir. + (install): Don't install tar.texinfo. There's no standard + place for texinfo files, and /usr/local/man is inappropriate. + Add TAGS, distclean, and realclean targets and SHELL= line. + + * version.c: Move old change history to bottom of ChangeLog. + +Wed Jun 12 12:43:58 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * rtape_lib.c (__rmt_write): #ifdef should reference + SIGNAL_VOID, not USG. + +Wed Jun 5 14:57:11 1991 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * tar.c (name_match, addname): Ugly hack to handle -C without + any files specified. + tar.h (struct name): New field for ugly hack. + +Mon Jun 3 14:46:46 1991 Michael I Bushnell (mib@geech.gnu.ai.mit.edu) + + * testpad.c: New file to determine if we need special padding + in struct header in tar.h. + + * tar.h (struct header): include padding if necessary, include + testpad.h. + + * Makefile: rules to create testpad.h, etc. + +Wed May 22 16:02:35 1991 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu) + + * tar.c (options): -L takes an argument. + + * rtape_lib.c (__rmt_open): add /usr/bin/nsh to the list of + remote shell programs. + + * create.c: define MAXPATHLEN if we don't get it from a system + header file. + + * create.c (deal_with_sparse): return a real return value if + we can't open the file. + + * tar.c (long_options): +newer takes an argument. + (describe): fix printing in various trivial ways + +Tue May 21 17:15:19 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * tar.c (long_options): +get and +concatentate don't require arguments + +Mon May 20 15:55:30 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * create.c (write_eot): Don't try and write an EOF if we are + already at one. + + * port.c (strstr): Looking for null string should return zero. + +Sun May 19 22:30:10 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * tar.c (options): -l doesn't take an argument + + * Makefile: minor fix for SGI 4D defines from torda@scum.ethz.ch + + * rtape_server.c (main.c): Suggested mod for 386/AIX from + Minh Tran-Le. I'm suspicious about this one. + + * create.c (dump_file): Mods from Minh Tran-Le for hidden + files on AIX. + gnu.c (collect_and_sort_name, get_dir_contents): AIX hidden file mod. + + * tar.c: (name_next): Mod from David Taylor to allow -C inside + a file list given to -T. + + * Makefile: Comment describing presence of USE_REXEC. + + * extract.c (extract_archive, case LF_SPARSE): zero check for + last element on numbytes needs to look at value after + converted from octal. + + * port.c: Don't always demand strstr, check for HAVE_STRSTR + instead. + Makefile: Comment describing presence of HAVE_STRSTR option. + +Sun May 19 18:39:48 1991 David J. MacKenzie (djm at churchy.gnu.ai.mit.edu) + + * port.c (get_date): Renamed from getdate, to avoid SVR4 conflict. + * tar.c: Call get_date instead of getdate. + +Fri May 10 02:58:17 1991 Noah Friedman (friedman at nutrimat) + + * tar.c: added "\n\" to the end of some documentation strings + where they were left off. + +Thu May 9 17:28:54 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * Makefile: added level-0, level-1, and backup-specs to AUX. + * version.c: changed to 1.10 beta. + * README: updated for 1.10 beta release. + +Tue Apr 2 12:04:54 1991 Michael I Bushnell (mib at godwin) + + * create.c (dump_file): HPUX's st_blocks is in 1024 byte units + instead of 512 like the rest of the world, so I special cased + it. + * tar.c: Undo Noah's changes. + +Mon Apr 1 17:49:28 1991 Noah Friedman (friedman at wookumz.gnu.ai.mit.edu) + + (This ought to be temporary until things are fixed properly. ) + + * tar.c: (struct option long_options): flag for "sparse" zero if + compiling under hpux. + tar.c: (functon options): case 'S' is a no-op if compiling under + hpux. + +Sat Mar 30 12:20:41 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * tar.h: new variable tape_length. + + * tar.c (options): add new option +tape-length / -L. + + * buffer.c (fl_write): Turn #ifdef TEST code for limited tape + length on always, for tape-length option. + + * create.c (dump_file): avoid apollo lossage where S_IFIFO == S_IFSOCK. + + * buffer.c: include regex.h + * buffer.c (fl_read, open_archive): Use regex routines for + volume header match. + * xmalloc.c: removed file; wasn't necessary. + * tar.c: (main) use ck_malloc instead of xmalloc. + +Thu Mar 28 04:05:05 1991 Noah Friedman (friedman at goldman) + + * regex.c, regex.o: New links. + * tar.c: include regex.h. + * Makefile (OBJ2): Add regex.o. + (regex.o, tar.o): Depend on regex.h + (SRC2, AUX): Add the new files. + +Sat Mar 23 15:39:42 1991 Noah Friedman (friedman at wookumz.gnu.ai.mit.edu) + + * Makefile: added default flags and options for compiling under + hpux. + + * Added files alloca.c and xmalloc.c + +Sat Mar 23 14:35:31 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu) + + * port.c: Define WANT_VALLOC in HPUX. + +Fri Mar 15 06:20:15 1991 David J. MacKenzie (djm at geech.ai.mit.edu) + + * rtape_lib.c: If USG and not HAVE_MTIO, define NO_RMTIOCTL + automatically. + (_rmt_rexec): Temporarily re-open stdin and stdout to + /dev/tty, to guarantee that rexec() can prompt and read the + login name and password from the user. + From pascal@cnam.cnam.fr (Pascal Meheut). + * Makefile: Mention -DUSE_REXEC. + +Fri Mar 8 20:15:11 1991 Michael I Bushnell (mib at wookumz.ai.mit.edu) + + * tar.h, Makefile: Makefile CPP macro HAVE_SIZE_T might be + useful for some people. + + * gnu.c: lstat->stat define where appropriate + + * buffer.c (fl_write): keep track of amount written for +totals. + * tar.c, tar.h: set flag f_totals from +totals option + * tar.h (f_totals, tot_written): new variables + * tar.c (main): print total written with CMD_CREATE + + * tar.c (main): return appropriate exit status + +Thu Jan 17 00:50:21 1991 David J. MacKenzie (djm at apple-gunkies) + + * port.c: Remove a spurious `+' between functions (a remnant + of a context diff, apparently). + +Wed Jan 9 19:43:59 1991 Michael I Bushnell (mib at pogo.ai.mit.edu) + + * create.c (where_is_data): Rewritten to be better, and then + #ifdef-ed out. + (deal_with_sparse): Severly pruned. Now we write or don't + write only complete blocks, not worrying about partial blocks. + This simplifies calculations, removes bugs, and elides the + second scan through the block. The first was zero_record, the + second was where_is_data. + +Mon Jan 7 17:13:29 1991 Michael I Bushnell (mib at wookumz.ai.mit.edu) + + * create.c (deal_with_sparse): Second computation (for short + reads) of numbytes increment had subtraction backwards. + Need to handle calling where_is_data better when we did a + short read (it might go past the end of the read), also, set + sparsearray[...].offset in this case too. + +Fri Jan 4 12:24:38 EST 1991 Jay Fenlason (hack@ai.mit.edu) + + * buffer.c Return a special error code if the archive you're + trying to read starts with a different label than the one specified + on the command line. + +Wed Jan 2 12:05:21 EST 1991 Jay Fenlason (hack@ai.mit.edu) + + * gnu.c Prepend the current directory to the gnu_dumpfile, so that + -C's won't affect where the output goes. (sigh.) + +Tue Dec 18 18:05:59 EST 1990 Jay Fenlason (hack@ai.mit.edu) + + * (gnu.c) Don't complain if the gnudumpfile we're reading info + from doesn't exist. + + * create.c Write out gnudumpfile after finishing writing the archive. + + * tar.c Add +exclude FNAME, and make +exclude-from do what +exclude + used to. + + Make +version an operation, not an option. + + add +confirmation alias for +interactive. + +Tue Dec 4 13:28:08 EST 1990 Jay Fenlason (hack@ai.mit.edu) + + * tar.c (check_exclude) Don't let MUMBLE match MUMBLE.c or fooMUMBLE + but only foo/MUMBLE + + * Add the name mangler (mangle.c, plus changes to create.c and + extract.c) + + * extract.c Three small patches from Chip Salzenberg + (tct!chip@uunet.uu.net) + + Don't complain when extracting a link, IFF it already exists. + + Don't complain when extracting a directory IFF it already + exists. + + Don't ad u+wx to directories when running as root. + + * gnu.c Some changes from Chip Salzenberg to make + +listed-incremental work. + + * port.c Add the F_FREESP emulation of the ftruncate syscall. + +Wed Nov 21 15:57:07 EST 1990 Jay Fenlason (hack@ai.mit.edu) + + Remove excess \n from lots of msg() calls. + +Mon Nov 19 14:09:43 EST 1990 Jay Fenlason (hack@ai.mit.edu) + + * tar.c Rename +volume to +label + +Fri Nov 16 15:43:44 1990 David J. MacKenzie (djm at apple-gunkies) + + * tar.c (describe): Include the default values for -b and -f + (as set in the Makefile) in the message. + +Thu Nov 15 13:36:45 EST 1990 Jay Fenlason (hack@ai.mit.edu) + + * extract.c (extract_archive) Do the utime() call before the + chmod() call, 'cuz some versons of utime() trash the file's mode + bits. + + * list.c (read_and) Call do_something on volume headers and + multivol files even if they don't match the names we're looking for, + etc. . . + +Tue Nov 6 13:51:46 EST 1990 Jay Fenlason (hack@ai.mit.edu) + + * port.c (un-quote-string) Don't try to write a null + if there's already one there. + +Thu Nov 1 14:58:57 EST 1990 Jay Fenlason (hack@ai.mit.edu) + + * buffer.c (new_volume) fflush(msg_file) before reading for + confirmation on new volume. On EOF or error, print error msg and + abort. + +Mon Oct 29 12:06:35 EST 1990 Jay Fenlason (hack@ai.mit.edu) + + * getdate.y Use new version of getdate(). + + * tar.c (name_add) Use sizeof(char *) instead of sizeof(int) + + * README give the correct return address. + +Thu Oct 25 16:03:58 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + rtape_lib.c Change RMTIOCTL to NO_RMTIOCTL, so it is on by default. + + rmt.h Add _isrmt() #define for NO_REMOTE case. + + gnu.c Add forward reference for add_dir_name(). + +Tue Oct 16 11:04:52 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + 1.09 New -G file implementation of gnu-dump stuff. + + * tar.c (name_add) Get the calls to ck_realloc and ck_malloc right. + +Thu Oct 11 11:23:38 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * gnu.c Fix A couple of typos. + +Wed Sep 19 13:35:03 1990 David J. MacKenzie (djm at apple-gunkies) + + * getdate.y [USG] (ftime): Use `daylight' unless + DAYLIGHT_MISSING is defined. + +Mon Sep 17 18:04:21 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * gnu.c (gnu_restore) Don't use a passed char* for the + file name, use skipcrud+head->header.name, just like everything + else does. This means that gnu_restore will still work with + small buffers, etc. + +Thu Sep 13 15:01:17 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * tar.c (add_exclude) Don't bus-error if the exclude file doesn't + end with a newline. + +Sun Sep 9 22:35:27 1990 David J. MacKenzie (djm at albert.ai.mit.edu) + + * Makefile (dist): Remove .fname when done. + +Thu Sep 6 12:48:58 EDT 1990 Jay Fenlason (hack@ai.mti.edu) + + * gnu.c (gnu_restore) Rember to skip_file() over the directory + contents, even if we don't have to do anything with them. + + * create.c extract.c diffarch.c Free sparsearray after we're done + with it. + +Tue Sep 4 10:18:50 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * Makefile Include gnu.c in dist + + * gnu.c move add_dir above read_dir_file so that cc doesn't complain + about add_dir returning void. + +Sun Sep 2 20:46:34 1990 David J. MacKenzie (djm at apple-gunkies) + + * getdate.y: Declare some more functions and add storage + classes where omitted to shut compiler up. + [USG] (ftime): Don't use extern var `daylight'; appears that + some systems don't have it. + +Wed Aug 29 00:05:06 1990 David J. MacKenzie (djm at apple-gunkies) + + * getdate.y (lookup): In the code that allows `Aug.' to be + recognized as `Aug', don't chop off the final `.' from words + like `a.m.', so they can be recognized. + +Thu Aug 16 11:34:07 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * buffer.c (open_archive) If -O, write verbosity to stderr + instead of stdout. + +Fri Aug 10 12:29:28 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * getdate.y Handle an explicit DST in the input string. + A dozen line patch from Per Foreby (perf@efd.lth.se). + +Mon Jul 16 13:05:11 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * tar.c rename -g -G +incremental, +listed-imcremental, etc. + +Fri Jul 13 14:10:33 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * tar.c Make +newer and +newer-mtime work according to their names. + + * gnu.c If +newer or +newer-mtime, use the time specified on the + command line. + + * buffer.c, create.c Add test to see if dimwit is trying to + archive the archive. + + * tar.c (long_options[]) re-ordered, so that groups of similar + options are next to each other. . . I think. + + (describe) Modified to more closely reflect reality. + +Fri Jul 6 13:13:59 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * tar.c add compile-time option for SYS V (?) style + tape-drive names /dev/rmt/{n}[lmh] + + * tar.c Fix getopt-style stuff so that -C always works correctly. + + * gnu.c, tar.c make filename to -G optional. + + * {all over}, replace some fprintf(stderr...) calls with calls + to msg(). + + * port.c Make -Dmumble_MSG option on command line override + internal assumptions. + + * Makefile Mention -Dmumble_MSG options + +Fri Jul 6 02:35:31 1990 David J. MacKenzie (djm at apple-gunkies) + + * tar.c (options): Don't change `c' if it is 0, as getopt now + handles that internally. + +Mon Jul 2 15:21:13 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * gnu.c (new file) Moved all the f_gnudump stuff here where we + can keep track of it easier. Also made -G take a file name where it + stores the inode information about directories so that we can + detect moved directores. + + * create.c (dump_file) Changed slightly to work with the new + f_gnudump. + + * tar.c Moved the f_gnudump stuff to gnu.c + + * tar.c, extract.c added the +do-chown option, which forces tar + to always try to chown the created files to their original owners. + + * version.c New version 1.09 + +Sun Jun 24 14:26:28 1990 David J. MacKenzie (djm at albert.ai.mit.edu) + + * create.c: Change ifdefs for directory library header + selection to be like the ones in tar.c. + * Makefile [Xenix]: Link with -ldir to get the dirent.h + directory library. + +Thu Jun 7 03:31:51 1990 David J. MacKenzie (djm at albert.ai.mit.edu) + + * Makefile, buffer.c, diffarch.c: Change MTIO symbol to HAVE_MTIO + because SCO Xenix defines 'MTIO' for an incompatible tape driver + system in a file included by termio.h. + * tar.h: Don't define size_t for Xenix. + +Tue Jun 5 11:38:00 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * create.c (dump_file) Only print the + "... is on a different filesystem..." if f_verbose is on. + also add a case for S_IFSOCK and treat it like a FIFO. + (Not sure if that's the right thing to do or not, but it's better + than all those Unknown File Type msgs.) + +Thu May 31 19:25:36 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * port.c Use #ifdef sparc instead of #ifdef SPARC since + the lowercase version is defined, and the uppercase one isn't. + +Tue May 22 11:49:18 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * port.c (ck_malloc) if size==0 pretend size=1 + (ck_realloc) if(!ptr) call ck_malloc instead. + +Tue May 15 12:05:45 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * diffarch.c (diff_archive) If not f_absolute_paths, and attempt to + open a file listed in the archive fails, try /filename also. This will + allow diff to open the wrong file if both /filename and filename exist, + but there's nothing we can do about that. + +Fri May 11 16:17:43 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * Makefile, Descripbe new -DMTIO option. + + * buffer.c diffarch.c Change ifdefs slightly, so that + -DMTIO will include sys/mtio.h even if USG is defined. + This is for HUPX and similar BSD/USG crossovers. + +Tue May 8 13:14:54 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + * update.c (update_archive) Call reset_eof() when appropriate. + + * buffer.c (reset_eof) New function, that turns of EOF flag, and + re-sets the ar_record and ar_last pointers. This will allow + 'tar rf non-existant-file' to not core-dump. + +Fri May 4 14:05:31 1990 David J. MacKenzie (djm at albert.ai.mit.edu) + + * tar.c: Recognize the +sparse option. It was documented, but + only the short form (-S) was actually recognized. + +Tue Apr 17 21:34:14 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * create.c Don't access location 0 if ->dir_contents is null. + +Wed Apr 11 17:30:03 EDT 1990 Jay Fenlason (hack@ai.mit.edu) + + * buffer.c (flush_archive, close_archive, new_volume) Always check + the return value of rmtclose(), and only give a warning msg if it is + <0. Some device drivers (including Sun floppy disk, and HP + streaming tape) return -1 after an IO error (or something like that.) + +Fri Mar 23 00:06:30 1990 Jim Kingdon (kingdon at mole.ai.mit.edu) + + * tar.c (long_options): Make it so +append +extract +list +update + +catenate and +delete don't take arguments. + +Mon Mar 12 13:33:53 EST 1990 + + * buffer.c (open_archive, fl_write) Set the mtime of the volume + header to the current time. + +Wed Mar 7 14:10:10 EST 1990 Jay Fenlason (hack@ai.mit.edu) + + * buffer.c Fix +compress-block A two character patch from + Juha Sarlin (juha@tds.kth.se) + Replace #ifdef __GNU__ with #ifdef __STDC__ + (new_volume) If open of new archive fails, ask again + (Is probably user error.) + + * tar.c Replace #ifdef __GNU__ with #ifdef __STDC__ + + * port.c Clean up #ifdef and #defines a bit. + (quote_copy_string) Sometimes the malloc'd buffer + would be up to two characters too short. + + * extract.c (extract_archive) Don't declare ind static. + + * create.c (dump_file) Don't declare index_offset static. + + * diffarch.c Remove diff_name variable, and always use + head->header.name, which will always work, unlike diff_name, which + becomes trash when the next block is read in. + +Thu Mar 1 13:43:30 EST 1990 Jay Fenlason (hack@wookumz.ai.mit.edu) + + * Makefile Mention the -NO_REMOTE option. + * port.c Fix typo, and define WANT_FTRUNCATE on i386 machines. + +Mon Feb 26 17:44:53 1990 Jim Kingdon (kingdon at pogo.ai.mit.edu) + + * getdate.y: Declare yylex and yyerror as static. + #define yyparse to getdate_yyparse. + +Sun Feb 25 20:47:23 1990 David J. MacKenzie (djm at albert.ai.mit.edu) + + * tar.c: Remove +old option, since it is a valid abbreviation of + +old-archive, which does the same thing. + (describe): A few small cleanups in message. + +Mon Feb 5 14:29:21 EST 1990 Jay Fenlason (hack@wookumz) + + * port.c define LOSING_MSG on sparc, since doprnt_msg doesn't work. + Fix typo in #ifdef WANT_GETWD + +Fri Jan 26 16:11:20 EST 1990 Jay Fenlason (hack@wookumz) + + 1.08 Sparse file support added. Also various other features. + + * diffarch.c (compare_chunk) Include correct arguments in + a call to fprintf() for an error msg. + (compare_chunks, compare_dir) First argument is a long, not an int. + + * tar.c (options) Use tar variable (argv[0]) as the name to print + in an error msg, instead of a constant "tar". + (confirm) Use external variable char TTY_NAME[] for name of file + to open for confirmation input. + + * buffer.c (new_volume) Ditto. + + * port.c Add declaration for TTY_NAME[]. + + * rmt.h Add long declarations for lseek() and __rmt_lseek(); + +Tue Jan 23 14:06:21 EST 1990 Jay Fenlason (hack@wookumz) + * tar.c, create.c Create the +newer-mtime option, which is like + +newer, but only looks for files whose mtime is newer than the + given date. + + * rtape_lib.c Make *both* instances of signal-handler stuff use + void (*foo)() on USG systems. + +Thu Jan 11 14:03:45 EST 1990 Jay Fenlason (hack@wookumz) + + * getdate.y Parse European dates of the form YYMMDD. + In ftime() Init timezone by calling localtime(), and remember that + timezone is in seconds, but we want timeb->timezone to be in minutes. + This small patch from Joergen Haegg (jh@aahas.se) + + * rtape_lib.c (__rmt_open) Also look for /usr/bsd/rsh. + Declare signal handler as returning void instead of int if USG is + defined. + + * port.c Declare WANT_GETWD for SGI 4-D IRIS. + + * Makefile Include defines for SGI 4D version. There are a simple + patch from Mike Muuss (mike@brl.mil). + + * buffer.c (fl_read) Work properly on broken Ultrix systems where + read() returns -1 with errno==ENOSPC on end of tape. Correctly go + on to the next volume if f_multivol. + + * list.c (list_archive,print_header) Flush msg_file after printing + messages. + + * port.c Delete unused references to alloca(). + Don't crash if malloc() returns zero in quote_copy_string. + Flush stderr in msg() and msg_perror(). + + * tar.c Flush msg_file after printing confirmation msg. + +Wed Jan 10 01:58:46 1990 David J. MacKenzie (djm at hobbes.ai.mit.edu) + + * tar.c (main): Change -help option and references to it to +help, + and remove suggestion to run info (which is unreleased, so not + likely to be of any help). + +Tue Jan 9 16:16:00 EST 1990 Jay Fenlason (hack @wookumz) + + * create.c (dump_file) Close file descriptor if start_header() + fails. + (dump_file) Change test for ./ ness to not think that + .{any character} is a ./ These are both trivial changes from + Piercarlo "Peter" Grandi pcg%cs.aber.ac.uk@nsfnet-relay.ac.uk + + * diffarch.c (diff_init) Print correct number of bytes in error + message. + +Tue Jan 9 03:19:49 1990 David J. MacKenzie (djm at hobbes.ai.mit.edu) + + * Makefile: Add comment at top noting that two source files also + contain #defines that might need to be changed by hand. + + * create.c, diffarch.c, extract.c: Change L_SET to 0 in lseek + calls, because only BSD defines it. + * create.c (dump_file): Make sparse file checking code conditional + on BSD42 because it uses st_blocks, which the other systems lack. + +Tue Jan 2 13:35:56 EST 1990 Jay Fenlason (hack@gnu) + + * port.c (quote_copy_string) Fix so it doesn't scramble memory if + the last character is non-printable. A trivial fix from Kian-Tat Lim + (ktl@wag240.caltech.edu). + +Tue Dec 19 11:19:37 1989 Jim Kingdon (kingdon at pogo) + + * port.c [BSD42]: Define DOPRNT_MSG. + tar.h [BSD42]: Do not prototype msg{,_perror}. + +Fri Dec 8 11:02:47 EST 1989 Jay Fenlason (hack@gnu) + + * create.c (dump_file) Remove typo in msg. + +Fri Dec 1 19:26:47 1989 David J. MacKenzie (djm at trix) + + * Makefile: Remove comments referring to certain systems lacking + getopt, since it is now provided always and needed by all systems. + + * port.c: Remove copy of getopt.c, as it is now linked in + separately to always get the current version. + + * tar.c: Rename +cat-tars option to +catenate or +concatenate, + and +local-filesystem to +one-file-system (preferred by rms + and used in GNU cp for the same purpose). + (describe): Reflect changes. + +Tue Nov 28 04:28:26 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu) + + * port.c: Move declaration of alloca into #else /* sparc */ + so it will compile on sparcs. + +Mon Nov 27 15:17:08 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu) + + * tar.c (options): Remove -version option (replaced by +version). + (describe): Mention long options. + +Sat Nov 25 04:25:23 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu) + + * getoldopt.c (getoldopt): Make `opt_index' argument a pointer to + an int, not char. + + * tar.c: Modify long options per rms's suggestions: + Make preserve-permissions an alias for same-permissions. + Make preserve-order an alias for same-order. + Define preserve to mean both of those combined. + Make old an alias for old-archive. + Make portability an alias for old-archive, also. + Rename sym-links to dereference. + Rename gnudump to incremental. + Rename filename to file. + Make compare an alias for diff. Leave diff but prefer compare. + Rename blocking-factor to block-size. + Rename chdir to directory. + Make uncompress an alias for compress. + Rename confirm to interactive. + Make get an alias for extract. + Rename volume-header to volume. + + Also make +version an alias for -version. + + (options): Shorten code that interprets long options by using + the equivalent short options' code. This also makes it tons + easier to change the long options. + + (describe): Make usage message more internally consistent + stylistically. + +Mon Nov 20 14:55:39 EST 1989 hack@ai.mit.edu + + * list.c (read_and) Call check_exclude() to see if the files + should be skipped on extract or list. + +Thu Nov 9 18:59:32 1989 Jim Kingdon (kingdon at hobbes.ai.mit.edu) + + * buffer.c (fl_read): Fix typos in error message + "tar EOF not on block boundary". + +Mon Oct 23 13:09:40 EDT 1989 (hack@ai.mit.edu) + + * tar.c (long_options[]) Add an option for blocked compression. + +Thu Oct 19 13:38:16 EDT 1989 (hack@ai.mit.edu) + + * buffer.c (writeerror) Print a more useful error msg. + +Wed Sep 27 18:33:41 EDT 1989 (hack@ai.mit.edu) + + * tar.c (main) Mention "tar -help" if the luser types a non-workable + set of options. + +Mon Sep 11 15:03:29 EDT 1989 (hack@ai.mit.edu) + + * tar.c (options) Have -F correctly set info_script. + +Tue Aug 29 12:58:06 EDT 1989 (hack@ai.mit.edu) + + * Makefile Include ChangeLog in tar.tar and tar.tar.Z + +Mon Aug 28 17:42:24 EDT 1989 (hack@ai.mit.edu) + + * tar.c (options) Made -F imply -M + Also remind tar that the -f option takes an argument! + + * Modified -F option to make it do what (I think) it + should. e.g, if you say -F, tar won't send a msg to + msg_file and wait for a <return> It'll just run the program + it was given, and when the prog returns, the new tape had + *better* be ready. . . + + * buffer.c (open_archive) Give error message and abort if + the luser didn't give an archive name. + +Fri Aug 25 20:05:27 EDT 1989 Joy Kendall (jak at hobbes) + + * Added code to make a new option to run a specified script + at the end of each tape in a multi-volume backup. Changed: + tar.c: made new switch, -F, and new long-named option, + "info-script". Code is where you would expect. + tar.h: added flag f_run_script_at_end, and an extern char * + called info_script, which optarg gets set to. + buffer.c: line 1158 in new_volume(): if f_run_script_at_end + is set, we give info_script to system(), otherwise we do + what we've always done. **FIXME** I'm not sure if that's all + that has to be done here. + +Thu Aug 24 10:09:38 EDT 1989 Joy Kendall (jak at spiff) +(These changes made over the course of 6/89 - 8/89) + + * diffarch.c: diff_archive: Added switches for LF_SPARSE in the + case statements that needed it. Also, skip any extended headers + if we need to when we skip over a file. (need to change + the bit about, if the size doesn't agree AND the file is NOT + sparse, then there's a discrepancy, because I added another + field to the header which should be able to deal with the + sizes) If the file is sparse, call the added routine + "diff_sparse_files" to compare. Also added routine + "fill_in_sparse_array". + + * extract.c: extract_archive: added the switch LF_SPARSE + to the case statement as needed, and code to treat the + sparse file. At label "again_file", modified opening the + file to see if we should have O_APPEND be one of the modes. + Added code at label "extract_file" to call the new routine + "extract_sparse_file" when we have an LF_SPARSE flag. + + Note: really should erase the commented-out code in there, + because it's confusing. + + * update.c: made sure that if a file needed to be "skipped" + over, it would check to see if the linkflag was sparse, and + if so, would then make sure to skip over any "extended + headers" that might come after the header itself. Do so by + calling "skip_extended_headers". + + * create.c: create_archive: added code to detect a sparse + file when in the long case statement. Added ways to detect + extended headers, and label "extend" (ack! should get rid of + that, is atrocious). Call the new routine "finish_sparse_file" + if the linkflag is LF_SPARSE to write the info to the tape. + Also added routines "init_sparsearray", "deal_with_sparse", + "clear_buffer", "where_is_data", "zero_record", and + "find_new_file_size". + + * tar.h: Added the #define's SPARSE_EXT_HDR and + SPARSE_IN_HDR. Added the struct sparse and the struct + sp_array. Added the linkflag LF_SPARSE. Changed the tar + header in several ways: + - added an array of struct sparse's SPARSE_IN_HDR long + - added a char flag isextended + - added a char string realsize to store the true + size of a sparse file + Added another choice to the union record called a + struct extended_header, which is an array of 21 struct + sparse's and a char isextended flag. Added flag + f_sparse_file to list of flags. + + * tar.c: added long-named options to make tar compatible with + getopt_long, changed Makefile. + +... ... .. ..:..:.. ... .... Jay Fenlason (hack@ai.mit.edu) + + 1.07 New version to go on beta tape with GCC 1.35 + Better USG support. Also support for __builtin_alloca + if we're compiling with GCC. + diffarch.c: Include the correct header files so MTIOCTOP + is defined. + tar.c: Don't print the verbose list of options unless + given -help. The list of options is *way* too long. + + 1.06 Use STDC_MSG if __STDC__ defined + ENXIO meand end-of-volume in archive (for the UNIX PC) + Added break after volume-header case (line 440) extract.c + Added patch from arnold@unix.cc.emory.edu to rtape_lib.c + Added f_absolute_paths option. + Deleted refereces to UN*X manual sections (dump(8), etc) + Fixed to not core-dump on illegal options + Modified msg_perror to call perror("") instead of perror(0) + patch so -X - works + Fixed tar.c so 'tar cf - -C dir' doesn't core-dump + tar.c (name_match): Fixed to chdir() to the appropriate + directory if the matching name's change_dir is set. This + makes tar xv -C foo {files} work. + + 1.05 A fix to make confirm() work when the archive is on stdin + include 'extern FILE *msg_file;' in pr_mkdir(), and fix + tar.h to work with __STDC__ + + Added to port.c: mkdir() ftruncate() Removed: lstat() + Fixed -G to work with -X + Another fix to tar.texinfo + Changed tar.c to say argv[0]":you must specify exactly ... + buffer.c: modified child_open() to keep tar from hanging when + it is done reading/writing a compressed archive + added fflush(msg_file) before printing error messages + create.c: fixed to make link_names non-absolute + + 1.04 Added functions msg() and msg_perror() Modified all the + files to call them. Also checked that all (I hope) + calls to msg_perror() have a valid errno value + (modified anno() to leave errno alone), etc + Re-fixed the -X option. This time for sure. . . + re-modified the msg stuff. flushed anno() completely + Modified the directory stuff so it should work on sysV boxes + added ftime() to getdate.y + Fixed un_quote_string() so it won't wedge on \" Also fixed + \ddd (like \123, etc) + More fixes to tar.texinfo + + 1.03 Fixed buffer.c so 'tar tzf NON_EXISTENT_FILE' returns an error + message instead of hanging forever + More fixes to tar.texinfo + + 1.02 Fixed tar.c so 'tar -h' and 'tar -v' don't cause core dump + Also fixed the 'usage' message to be more up-to-date. + Fixed diffarch.c so verify should compile without MTIOCTOP + defined + + 1.01 Fixed typoes in tar.texinfo + Fixed a bug in the #define for rmtcreat() + Fixed the -X option to not call realloc() of 0. + + Version 1.00: version.c added. -version option added + Installed new version of the remote-tape library + Added -help option + +Local Variables: +mode: indented-text +left-margin: 8 +version-control: never +End: diff --git a/gnu/usr.bin/tar/Makefile b/gnu/usr.bin/tar/Makefile new file mode 100644 index 0000000..810fe3b --- /dev/null +++ b/gnu/usr.bin/tar/Makefile @@ -0,0 +1,14 @@ +PROG= tar +SRCS= buffer.c create.c diffarch.c extract.c fnmatch.c getdate.y \ + getoldopt.c getopt.c getopt1.c gnu.c list.c mangle.c names.c port.c \ + regex.c rtapelib.c tar.c update.c version.c +CFLAGS+= -DRETSIGTYPE=void -DDIRENT=1 -DHAVE_SYS_MTIO_H=1 -DHAVE_UNISTD_H=1 +CFLAGS+= -DHAVE_GETGRGID=1 -DHAVE_GETPWUID=1 -DHAVE_STRING_H=1 +CFLAGS+= -DHAVE_LIMITS_H=1 -DHAVE_STRSTR=1 -DHAVE_VALLOC=1 -DHAVE_MKDIR=1 +CFLAGS+= -DHAVE_MKNOD=1 -DHAVE_RENAME=1 -DHAVE_FTRUNCATE=1 -DHAVE_GETCWD=1 +CFLAGS+= -DHAVE_VPRINTF=1 -DNEEDPAD -I${.CURDIR} +CFLAGS+= -DDEF_AR_FILE=\"/dev/rst0\" -DDEFBLOCKING=20 +NOMAN=noman + +.include <bsd.prog.mk> +.include "../../usr.bin/Makefile.inc" diff --git a/gnu/usr.bin/tar/Makefile.gnu b/gnu/usr.bin/tar/Makefile.gnu new file mode 100644 index 0000000..a03617a --- /dev/null +++ b/gnu/usr.bin/tar/Makefile.gnu @@ -0,0 +1,185 @@ +# Generated automatically from Makefile.in by configure. +# Un*x Makefile for GNU tar program. +# Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +#### Start of system configuration section. #### + +srcdir = . +VPATH = . + +# If you use gcc, you should either run the fixincludes script that +# comes with it or else use gcc with the -traditional option. Otherwise +# ioctl calls will be compiled incorrectly on some systems. +CC = gcc +YACC = bison -y +INSTALL = /usr/local/bin/install -c +INSTALL_PROGRAM = $(INSTALL) +INSTALL_DATA = $(INSTALL) -m 644 + +# Things you might add to DEFS: +# -DSTDC_HEADERS If you have ANSI C headers and libraries. +# -DHAVE_UNISTD_H If you have unistd.h. +# -DHAVE_STRING_H If you don't have ANSI C headers but have string.h. +# -DHAVE_LIMITS_H If you have limits.h. +# -DBSD42 If you have sys/dir.h (unless you use -DPOSIX), +# sys/file.h, and st_blocks in `struct stat'. +# -DDIRENT If you have dirent.h. +# -DSYSNDIR Old Xenix systems (sys/ndir.h). +# -DSYSDIR Old BSD systems (sys/dir.h). +# -DNDIR Old System V systems (ndir.h). +# -DMAJOR_IN_MKDEV If major, minor, makedev defined in sys/mkdev.h. +# -DMAJOR_IN_SYSMACROS If major, minor, makedev defined in sys/sysmacros.h. +# -DRETSIGTYPE=int If your signal handlers return int, not void. +# -DHAVE_SYS_MTIO_H If you have sys/mtio.h (magtape ioctls). +# -DHAVE_SYS_GENTAPE_H If you have sys/gentape.h (ISC magtape ioctls). +# -DHAVE_NETDB_H To use rexec for remote tape operations +# instead of forking rsh or remsh. +# -DNO_REMOTE If you have neither a remote shell nor rexec. +# -DHAVE_VPRINTF If you have vprintf function. +# -DHAVE_DOPRNT If you have _doprnt function (but lack vprintf). +# -DHAVE_FTIME If you have ftime system call. +# -DHAVE_STRSTR If you have strstr function. +# -DHAVE_VALLOC If you have valloc function. +# -DHAVE_MKDIR If you have mkdir and rmdir system calls. +# -DHAVE_MKNOD If you have mknod system call. +# -DHAVE_RENAME If you have rename system call. +# -DHAVE_GETCWD If not POSIX.1 but have getcwd function. +# -DHAVE_FTRUNCATE If you have ftruncate system call. +# -DV7 On Version 7 Unix (not tested in a long time). +# -DEMUL_OPEN3 If you lack a 3-argument version of open, and want +# to emulate it with system calls you do have. +# -DNO_OPEN3 If you lack the 3-argument open and want to +# disable the tar -k option instead of emulating open. +# -DXENIX If you have sys/inode.h and need it to be included. + +DEF_AR_FILE = /dev/rst0 +DEFBLOCKING = 20 +DEFS = -DRETSIGTYPE=void -DDIRENT=1 -DHAVE_SYS_MTIO_H=1 -DHAVE_UNISTD_H=1 -DHAVE_GETGRGID=1 -DHAVE_GETPWUID=1 -DHAVE_STRING_H=1 -DHAVE_LIMITS_H=1 -DHAVE_STRSTR=1 -DHAVE_VALLOC=1 -DHAVE_MKDIR=1 -DHAVE_MKNOD=1 -DHAVE_RENAME=1 -DHAVE_FTRUNCATE=1 -DHAVE_GETCWD=1 -DHAVE_VPRINTF=1 -DDEF_AR_FILE=\"$(DEF_AR_FILE)\" -DDEFBLOCKING=$(DEFBLOCKING) + +# Set this to rtapelib.o unless you defined NO_REMOTE, in which case +# make it empty. +RTAPELIB = rtapelib.o +LIBS = + +CFLAGS = -g +LDFLAGS = -g + +prefix = /usr/bin +exec_prefix = $(prefix) + +# Prefix for each installed program, normally empty or `g'. +binprefix = + +# The directory to install tar in. +bindir = $(exec_prefix)/bin + +# Where to put the rmt executable. +libdir = /sbin + +# The directory to install the info files in. +infodir = $(prefix)/info + +#### End of system configuration section. #### + +SHELL = /bin/sh + +SRC1 = tar.c create.c extract.c buffer.c getoldopt.c update.c gnu.c mangle.c +SRC2 = version.c list.c names.c diffarch.c port.c fnmatch.c getopt.c malloc.c +SRC3 = getopt1.c regex.c getdate.y getdate.c alloca.c +SRCS = $(SRC1) $(SRC2) $(SRC3) +OBJ1 = tar.o create.o extract.o buffer.o getoldopt.o update.o gnu.o mangle.o +OBJ2 = version.o list.o names.o diffarch.o port.o fnmatch.o getopt.o +OBJ3 = getopt1.o regex.o getdate.o $(RTAPELIB) +OBJS = $(OBJ1) $(OBJ2) $(OBJ3) +AUX = README INSTALL NEWS COPYING ChangeLog Makefile.in makefile.pc \ + configure configure.in \ + tar.h fnmatch.h pathmax.h port.h open3.h getopt.h regex.h \ + rmt.h rmt.c rtapelib.c \ + msd_dir.h msd_dir.c tcexparg.c \ + level-0 level-1 backup-specs dump-remind getpagesize.h +# tar.texinfo tar.info* texinfo.tex \ + +all: tar rmt +# tar.info + +.c.o: + $(CC) -c $(CFLAGS) $(CPPFLAGS) $(DEFS) -I$(srcdir) -I. $< + +tar: $(OBJS) + $(CC) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) + +rmt: rmt.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(srcdir)/rmt.c $(LIBS) + +tar.info: tar.texinfo + makeinfo $(srcdir)/tar.texinfo + +install: all + $(INSTALL_PROGRAM) tar $(bindir)/$(binprefix)tar + -test ! -f rmt || $(INSTALL_PROGRAM) rmt $(libdir)/rmt +# for file in $(srcdir)/tar.info*; \ +# do $(INSTALL_DATA) $$file $(infodir)/$$file; \ +# done + +uninstall: + rm -f $(bindir)/$(binprefix)tar $(infodir)/tar.info* + -rm -f $(libdir)/rmt + +$(OBJS): tar.h pathmax.h port.h +regex.o buffer.o tar.o: regex.h +tar.o fnmatch.o: fnmatch.h + +getdate.c: getdate.y + $(YACC) $(srcdir)/getdate.y + mv y.tab.c getdate.c +# getdate.y has 8 shift/reduce conflicts. + +TAGS: $(SRCS) + etags $(SRCS) + +clean: + rm -f *.o tar rmt core +mostlyclean: clean + +distclean: clean + rm -f Makefile config.status + +realclean: distclean + rm -f TAGS *.info* getdate.c y.tab.c + +shar: $(SRCS) $(AUX) + shar $(SRCS) $(AUX) | gzip > tar-`sed -e '/version_string/!d' -e 's/[^0-9.]*\([0-9.]*\).*/\1/' -e q version.c`.shar.z + +dist: $(SRCS) $(AUX) + echo tar-`sed -e '/version_string/!d' -e 's/[^0-9.]*\([0-9.]*\).*/\1/' -e q version.c` > .fname + -rm -rf `cat .fname` + mkdir `cat .fname` + for file in $(SRCS) $(AUX); do \ + ln $$file `cat .fname` || cp $$file `cat .fname`; done + tar chzf `cat .fname`.tar.z `cat .fname` + -rm -rf `cat .fname` .fname + +tar.zoo: $(SRCS) $(AUX) + -rm -rf tmp.dir + -mkdir tmp.dir + -rm tar.zoo + for X in $(SRCS) $(AUX) ; do echo $$X ; sed 's/$$/
/' $$X > tmp.dir/$$X ; done + cd tmp.dir ; zoo aM ../tar.zoo * + -rm -rf tmp.dir + +# Prevent GNU make v3 from overflowing arg limit on SysV. +.NOEXPORT: diff --git a/gnu/usr.bin/tar/README b/gnu/usr.bin/tar/README new file mode 100644 index 0000000..4b577e7 --- /dev/null +++ b/gnu/usr.bin/tar/README @@ -0,0 +1,40 @@ +Hey! Emacs! Yo! This is -*- Text -*- !!! + +This GNU tar 1.11.2. Please send bug reports, etc., to +bug-gnu-utils@prep.ai.mit.edu. This is a beta-test release. Please +try it out. There is no manual; the release of version 1.12 will +contain a manual. + +GNU tar is based heavily on John Gilmore's public domain tar, but with +added features. The manual is currently being written. + +This distribution also includes rmt, the remote tape server (which +normally must reside in /etc). The mt tape drive control program is +in the GNU cpio distribution. + +See the file INSTALL for compilation and installation instructions for Unix. +See the file NEWS for information on all that is new in this version +of tar. + +makefile.pc is a makefile for Turbo C 2.0 on MS-DOS. + +Various people have been having problems using floppies on a NeXT. In +order to have them work right, you need to kill the automounting +program which tries to monut floppies as soon as they are added. + +If you want to do incremental dumps, use the distributed backup +scripts. They are what we use at the FSF to do all our backups. Most +importantly, do not use --incremental (-G) or --after-date (-N) or +--newer-mtime to do incremental dumps. The only option that works +correctly for this purpose is --listed-incremental. (When extracting +incremental dumps, use --incremental (-G).) + +If your system needs to link with -lPW to get alloca, but has +rename in the C library (so HAVE_RENAME is defined), -lPW might +give you an incorrect version of rename. On HP-UX this manifests +itself as an undefined data symbol called "Error" when linking cp, ln, +and mv. If this happens, use `ar x' to extract alloca.o from libPW.a +and `ar rc' to put it in a library liballoca.a, and put that in LIBS +instead of -lPW. This problem does not occur when using gcc, which +has alloca built in. + diff --git a/gnu/usr.bin/tar/buffer.c b/gnu/usr.bin/tar/buffer.c new file mode 100644 index 0000000..e0ffc2d --- /dev/null +++ b/gnu/usr.bin/tar/buffer.c @@ -0,0 +1,1584 @@ +/* Buffer management for tar. + Copyright (C) 1988, 1992, 1993 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * Buffer management for tar. + * + * Written by John Gilmore, ihnp4!hoptoad!gnu, on 25 August 1985. + */ + +#include <stdio.h> +#include <errno.h> +#ifndef STDC_HEADERS +extern int errno; +#endif +#include <sys/types.h> /* For non-Berkeley systems */ +#include <signal.h> +#include <time.h> +time_t time (); + +#ifdef HAVE_SYS_MTIO_H +#include <sys/ioctl.h> +#include <sys/mtio.h> +#endif + +#ifdef BSD42 +#include <sys/file.h> +#else +#ifndef V7 +#include <fcntl.h> +#endif +#endif + +#ifdef __MSDOS__ +#include <process.h> +#endif + +#ifdef XENIX +#include <sys/inode.h> +#endif + +#include "tar.h" +#include "port.h" +#include "rmt.h" +#include "regex.h" + +/* Either stdout or stderr: The thing we write messages (standard msgs, not + errors) to. Stdout unless we're writing a pipe, in which case stderr */ +FILE *msg_file = stdout; + +#define STDIN 0 /* Standard input file descriptor */ +#define STDOUT 1 /* Standard output file descriptor */ + +#define PREAD 0 /* Read file descriptor from pipe() */ +#define PWRITE 1 /* Write file descriptor from pipe() */ + +#define MAGIC_STAT 105 /* Magic status returned by child, if + it can't exec. We hope compress/sh + never return this status! */ + +void *valloc (); + +void writeerror (); +void readerror (); + +void ck_pipe (); +void ck_close (); + +int backspace_output (); +extern void finish_header (); +void flush_archive (); +int isfile (); +int new_volume (); +void verify_volume (); +extern void to_oct (); + +#ifndef __MSDOS__ +/* Obnoxious test to see if dimwit is trying to dump the archive */ +dev_t ar_dev; +ino_t ar_ino; +#endif + +/* + * The record pointed to by save_rec should not be overlaid + * when reading in a new tape block. Copy it to record_save_area first, and + * change the pointer in *save_rec to point to record_save_area. + * Saved_recno records the record number at the time of the save. + * This is used by annofile() to print the record number of a file's + * header record. + */ +static union record **save_rec; +union record record_save_area; +static long saved_recno; + +/* + * PID of child program, if f_compress or remote archive access. + */ +static int childpid = 0; + +/* + * Record number of the start of this block of records + */ +long baserec; + +/* + * Error recovery stuff + */ +static int r_error_count; + +/* + * Have we hit EOF yet? + */ +static int hit_eof; + +/* Checkpointing counter */ +static int checkpoint; + +/* JF we're reading, but we just read the last record and its time to update */ +extern time_to_start_writing; +int file_to_switch_to = -1; /* If remote update, close archive, and use + this descriptor to write to */ + +static int volno = 1; /* JF which volume of a multi-volume tape + we're on */ +static int global_volno = 1; /* Volume number to print in external messages. */ + +char *save_name = 0; /* Name of the file we are currently writing */ +long save_totsize; /* total size of file we are writing. Only + valid if save_name is non_zero */ +long save_sizeleft; /* Where we are in the file we are writing. + Only valid if save_name is non-zero */ + +int write_archive_to_stdout; + +/* Used by fl_read and fl_write to store the real info about saved names */ +static char real_s_name[NAMSIZ]; +static long real_s_totsize; +static long real_s_sizeleft; + +/* Reset the EOF flag (if set), and re-set ar_record, etc */ + +void +reset_eof () +{ + if (hit_eof) + { + hit_eof = 0; + ar_record = ar_block; + ar_last = ar_block + blocking; + ar_reading = 0; + } +} + +/* + * Return the location of the next available input or output record. + * Return NULL for EOF. Once we have returned NULL, we just keep returning + * it, to avoid accidentally going on to the next file on the "tape". + */ +union record * +findrec () +{ + if (ar_record == ar_last) + { + if (hit_eof) + return (union record *) NULL; /* EOF */ + flush_archive (); + if (ar_record == ar_last) + { + hit_eof++; + return (union record *) NULL; /* EOF */ + } + } + return ar_record; +} + + +/* + * Indicate that we have used all records up thru the argument. + * (should the arg have an off-by-1? XXX FIXME) + */ +void +userec (rec) + union record *rec; +{ + while (rec >= ar_record) + ar_record++; + /* + * Do NOT flush the archive here. If we do, the same + * argument to userec() could mean the next record (if the + * input block is exactly one record long), which is not what + * is intended. + */ + if (ar_record > ar_last) + abort (); +} + + +/* + * Return a pointer to the end of the current records buffer. + * All the space between findrec() and endofrecs() is available + * for filling with data, or taking data from. + */ +union record * +endofrecs () +{ + return ar_last; +} + + +/* + * Duplicate a file descriptor into a certain slot. + * Equivalent to BSD "dup2" with error reporting. + */ +void +dupto (from, to, msg) + int from, to; + char *msg; +{ + int err; + + if (from != to) + { + err = close (to); + if (err < 0 && errno != EBADF) + { + msg_perror ("Cannot close descriptor %d", to); + exit (EX_SYSTEM); + } + err = dup (from); + if (err != to) + { + msg_perror ("cannot dup %s", msg); + exit (EX_SYSTEM); + } + ck_close (from); + } +} + +#ifdef __MSDOS__ +void +child_open () +{ + fprintf (stderr, "MS-DOS %s can't use compressed or remote archives\n", tar); + exit (EX_ARGSBAD); +} + +#else +void +child_open () +{ + int pipe[2]; + int err = 0; + + int kidpipe[2]; + int kidchildpid; + +#define READ 0 +#define WRITE 1 + + ck_pipe (pipe); + + childpid = fork (); + if (childpid < 0) + { + msg_perror ("cannot fork"); + exit (EX_SYSTEM); + } + if (childpid > 0) + { + /* We're the parent. Clean up and be happy */ + /* This, at least, is easy */ + + if (ar_reading) + { + f_reblock++; + archive = pipe[READ]; + ck_close (pipe[WRITE]); + } + else + { + archive = pipe[WRITE]; + ck_close (pipe[READ]); + } + return; + } + + /* We're the kid */ + if (ar_reading) + { + dupto (pipe[WRITE], STDOUT, "(child) pipe to stdout"); + ck_close (pipe[READ]); + } + else + { + dupto (pipe[READ], STDIN, "(child) pipe to stdin"); + ck_close (pipe[WRITE]); + } + + /* We need a child tar only if + 1: we're reading/writing stdin/out (to force reblocking) + 2: the file is to be accessed by rmt (compress doesn't know how) + 3: the file is not a plain file */ +#ifdef NO_REMOTE + if (!(ar_files[0][0] == '-' && ar_files[0][1] == '\0') && isfile (ar_files[0])) +#else + if (!(ar_files[0][0] == '-' && ar_files[0][1] == '\0') && !_remdev (ar_files[0]) && isfile (ar_files[0])) +#endif + { + /* We don't need a child tar. Open the archive */ + if (ar_reading) + { + archive = open (ar_files[0], O_RDONLY | O_BINARY, 0666); + if (archive < 0) + { + msg_perror ("can't open archive %s", ar_files[0]); + exit (EX_BADARCH); + } + dupto (archive, STDIN, "archive to stdin"); + /* close(archive); */ + } + else + { + archive = creat (ar_files[0], 0666); + if (archive < 0) + { + msg_perror ("can't open archive %s", ar_files[0]); + exit (EX_BADARCH); + } + dupto (archive, STDOUT, "archive to stdout"); + /* close(archive); */ + } + } + else + { + /* We need a child tar */ + ck_pipe (kidpipe); + + kidchildpid = fork (); + if (kidchildpid < 0) + { + msg_perror ("child can't fork"); + exit (EX_SYSTEM); + } + + if (kidchildpid > 0) + { + /* About to exec compress: set up the files */ + if (ar_reading) + { + dupto (kidpipe[READ], STDIN, "((child)) pipe to stdin"); + ck_close (kidpipe[WRITE]); + /* dup2(pipe[WRITE],STDOUT); */ + } + else + { + /* dup2(pipe[READ],STDIN); */ + dupto (kidpipe[WRITE], STDOUT, "((child)) pipe to stdout"); + ck_close (kidpipe[READ]); + } + /* ck_close(pipe[READ]); */ + /* ck_close(pipe[WRITE]); */ + /* ck_close(kidpipe[READ]); + ck_close(kidpipe[WRITE]); */ + } + else + { + /* Grandchild. Do the right thing, namely sit here and + read/write the archive, and feed stuff back to compress */ + tar = "tar (child)"; + if (ar_reading) + { + dupto (kidpipe[WRITE], STDOUT, "[child] pipe to stdout"); + ck_close (kidpipe[READ]); + } + else + { + dupto (kidpipe[READ], STDIN, "[child] pipe to stdin"); + ck_close (kidpipe[WRITE]); + } + + if (ar_files[0][0] == '-' && ar_files[0][1] == '\0') + { + if (ar_reading) + archive = STDIN; + else + archive = STDOUT; + } + else /* This can't happen if (ar_reading==2) + archive = rmtopen(ar_files[0], O_RDWR|O_CREAT|O_BINARY, 0666); + else */ if (ar_reading) + archive = rmtopen (ar_files[0], O_RDONLY | O_BINARY, 0666); + else + archive = rmtcreat (ar_files[0], 0666); + + if (archive < 0) + { + msg_perror ("can't open archive %s", ar_files[0]); + exit (EX_BADARCH); + } + + if (ar_reading) + { + for (;;) + { + char *ptr; + int max, count; + + r_error_count = 0; + error_loop: + err = rmtread (archive, ar_block->charptr, (int) (blocksize)); + if (err < 0) + { + readerror (); + goto error_loop; + } + if (err == 0) + break; + ptr = ar_block->charptr; + max = err; + while (max) + { + count = (max < RECORDSIZE) ? max : RECORDSIZE; + err = write (STDOUT, ptr, count); + if (err != count) + { + if (err < 0) + { + msg_perror ("can't write to compression program"); + exit (EX_SYSTEM); + } + else + msg ("write to compression program short %d bytes", + count - err); + count = (err < 0) ? 0 : err; + } + ptr += count; + max -= count; + } + } + } + else + { + for (;;) + { + int n; + char *ptr; + + n = blocksize; + ptr = ar_block->charptr; + while (n) + { + err = read (STDIN, ptr, (n < RECORDSIZE) ? n : RECORDSIZE); + if (err <= 0) + break; + n -= err; + ptr += err; + } + /* EOF */ + if (err == 0) + { + if (!f_compress_block) + blocksize -= n; + else + bzero (ar_block->charptr + blocksize - n, n); + err = rmtwrite (archive, ar_block->charptr, blocksize); + if (err != (blocksize)) + writeerror (err); + if (!f_compress_block) + blocksize += n; + break; + } + if (n) + { + msg_perror ("can't read from compression program"); + exit (EX_SYSTEM); + } + err = rmtwrite (archive, ar_block->charptr, (int) blocksize); + if (err != blocksize) + writeerror (err); + } + } + + /* close_archive(); */ + exit (0); + } + } + /* So we should exec compress (-d) */ + if (ar_reading) + execlp (f_compressprog, f_compressprog, "-d", (char *) 0); + else + execlp (f_compressprog, f_compressprog, (char *) 0); + msg_perror ("can't exec %s", f_compressprog); + _exit (EX_SYSTEM); +} + + +/* return non-zero if p is the name of a directory */ +int +isfile (p) + char *p; +{ + struct stat stbuf; + + if (stat (p, &stbuf) < 0) + return 1; + if (S_ISREG (stbuf.st_mode)) + return 1; + return 0; +} + +#endif + +/* + * Open an archive file. The argument specifies whether we are + * reading or writing. + */ +/* JF if the arg is 2, open for reading and writing. */ +void +open_archive (reading) + int reading; +{ + msg_file = f_exstdout ? stderr : stdout; + + if (blocksize == 0) + { + msg ("invalid value for blocksize"); + exit (EX_ARGSBAD); + } + + if (n_ar_files == 0) + { + msg ("No archive name given, what should I do?"); + exit (EX_BADARCH); + } + + /*NOSTRICT*/ + if (f_multivol) + { + ar_block = (union record *) valloc ((unsigned) (blocksize + (2 * RECORDSIZE))); + if (ar_block) + ar_block += 2; + } + else + ar_block = (union record *) valloc ((unsigned) blocksize); + if (!ar_block) + { + msg ("could not allocate memory for blocking factor %d", + blocking); + exit (EX_ARGSBAD); + } + + ar_record = ar_block; + ar_last = ar_block + blocking; + ar_reading = reading; + + if (f_multivol && f_verify) + { + msg ("cannot verify multi-volume archives"); + exit (EX_ARGSBAD); + } + + if (f_compressprog) + { + if (reading == 2 || f_verify) + { + msg ("cannot update or verify compressed archives"); + exit (EX_ARGSBAD); + } + if (f_multivol) + { + msg ("cannot use multi-volume compressed archives"); + exit (EX_ARGSBAD); + } + child_open (); + if (!reading && ar_files[0][0] == '-' && ar_files[0][1] == '\0') + msg_file = stderr; + /* child_open(rem_host, rem_file); */ + } + else if (ar_files[0][0] == '-' && ar_files[0][1] == '\0') + { + f_reblock++; /* Could be a pipe, be safe */ + if (f_verify) + { + msg ("can't verify stdin/stdout archive"); + exit (EX_ARGSBAD); + } + if (reading == 2) + { + archive = STDIN; + msg_file = stderr; + write_archive_to_stdout++; + } + else if (reading) + archive = STDIN; + else + { + archive = STDOUT; + msg_file = stderr; + } + } + else if (reading == 2 || f_verify) + { + archive = rmtopen (ar_files[0], O_RDWR | O_CREAT | O_BINARY, 0666); + } + else if (reading) + { + archive = rmtopen (ar_files[0], O_RDONLY | O_BINARY, 0666); + } + else + { + archive = rmtcreat (ar_files[0], 0666); + } + if (archive < 0) + { + msg_perror ("can't open %s", ar_files[0]); + exit (EX_BADARCH); + } +#ifndef __MSDOS__ + if (!_isrmt (archive)) + { + struct stat tmp_stat; + + fstat (archive, &tmp_stat); + if (S_ISREG (tmp_stat.st_mode)) + { + ar_dev = tmp_stat.st_dev; + ar_ino = tmp_stat.st_ino; + } + } +#endif + +#ifdef __MSDOS__ + setmode (archive, O_BINARY); +#endif + + if (reading) + { + ar_last = ar_block; /* Set up for 1st block = # 0 */ + (void) findrec (); /* Read it in, check for EOF */ + + if (f_volhdr) + { + union record *head; +#if 0 + char *ptr; + + if (f_multivol) + { + ptr = malloc (strlen (f_volhdr) + 20); + sprintf (ptr, "%s Volume %d", f_volhdr, 1); + } + else + ptr = f_volhdr; +#endif + head = findrec (); + if (!head) + { + msg ("Archive not labelled to match %s", f_volhdr); + exit (EX_BADVOL); + } + if (re_match (label_pattern, head->header.arch_name, + strlen (head->header.arch_name), 0, 0) < 0) + { + msg ("Volume mismatch! %s!=%s", f_volhdr, + head->header.arch_name); + exit (EX_BADVOL); + } +#if 0 + if (strcmp (ptr, head->header.name)) + { + msg ("Volume mismatch! %s!=%s", ptr, head->header.name); + exit (EX_BADVOL); + } + if (ptr != f_volhdr) + free (ptr); +#endif + } + } + else if (f_volhdr) + { + bzero ((void *) ar_block, RECORDSIZE); + if (f_multivol) + sprintf (ar_block->header.arch_name, "%s Volume 1", f_volhdr); + else + strcpy (ar_block->header.arch_name, f_volhdr); + current_file_name = ar_block->header.arch_name; + ar_block->header.linkflag = LF_VOLHDR; + to_oct (time (0), 1 + 12, ar_block->header.mtime); + finish_header (ar_block); + /* ar_record++; */ + } +} + + +/* + * Remember a union record * as pointing to something that we + * need to keep when reading onward in the file. Only one such + * thing can be remembered at once, and it only works when reading + * an archive. + * + * We calculate "offset" then add it because some compilers end up + * adding (baserec+ar_record), doing a 9-bit shift of baserec, then + * subtracting ar_block from that, shifting it back, losing the top 9 bits. + */ +void +saverec (pointer) + union record **pointer; +{ + long offset; + + save_rec = pointer; + offset = ar_record - ar_block; + saved_recno = baserec + offset; +} + +/* + * Perform a write to flush the buffer. + */ + +/*send_buffer_to_file(); + if(new_volume) { + deal_with_new_volume_stuff(); + send_buffer_to_file(); + } + */ + +void +fl_write () +{ + int err; + int copy_back; + static long bytes_written = 0; + + if (f_checkpoint && !(++checkpoint % 10)) + msg ("Write checkpoint %d\n", checkpoint); + if (tape_length && bytes_written >= tape_length * 1024) + { + errno = ENOSPC; + err = 0; + } + else + err = rmtwrite (archive, ar_block->charptr, (int) blocksize); + if (err != blocksize && !f_multivol) + writeerror (err); + else if (f_totals) + tot_written += blocksize; + + if (err > 0) + bytes_written += err; + if (err == blocksize) + { + if (f_multivol) + { + if (!save_name) + { + real_s_name[0] = '\0'; + real_s_totsize = 0; + real_s_sizeleft = 0; + return; + } +#ifdef __MSDOS__ + if (save_name[1] == ':') + save_name += 2; +#endif + while (*save_name == '/') + save_name++; + + strcpy (real_s_name, save_name); + real_s_totsize = save_totsize; + real_s_sizeleft = save_sizeleft; + } + return; + } + + /* We're multivol Panic if we didn't get the right kind of response */ + /* ENXIO is for the UNIX PC */ + if (err < 0 && errno != ENOSPC && errno != EIO && errno != ENXIO) + writeerror (err); + + /* If error indicates a short write, we just move to the next tape. */ + + if (new_volume (0) < 0) + return; + bytes_written = 0; + if (f_volhdr && real_s_name[0]) + { + copy_back = 2; + ar_block -= 2; + } + else if (f_volhdr || real_s_name[0]) + { + copy_back = 1; + ar_block--; + } + else + copy_back = 0; + if (f_volhdr) + { + bzero ((void *) ar_block, RECORDSIZE); + sprintf (ar_block->header.arch_name, "%s Volume %d", f_volhdr, volno); + to_oct (time (0), 1 + 12, ar_block->header.mtime); + ar_block->header.linkflag = LF_VOLHDR; + finish_header (ar_block); + } + if (real_s_name[0]) + { + int tmp; + + if (f_volhdr) + ar_block++; + bzero ((void *) ar_block, RECORDSIZE); + strcpy (ar_block->header.arch_name, real_s_name); + ar_block->header.linkflag = LF_MULTIVOL; + to_oct ((long) real_s_sizeleft, 1 + 12, + ar_block->header.size); + to_oct ((long) real_s_totsize - real_s_sizeleft, + 1 + 12, ar_block->header.offset); + tmp = f_verbose; + f_verbose = 0; + finish_header (ar_block); + f_verbose = tmp; + if (f_volhdr) + ar_block--; + } + + err = rmtwrite (archive, ar_block->charptr, (int) blocksize); + if (err != blocksize) + writeerror (err); + else if (f_totals) + tot_written += blocksize; + + + bytes_written = blocksize; + if (copy_back) + { + ar_block += copy_back; + bcopy ((void *) (ar_block + blocking - copy_back), + (void *) ar_record, + copy_back * RECORDSIZE); + ar_record += copy_back; + + if (real_s_sizeleft >= copy_back * RECORDSIZE) + real_s_sizeleft -= copy_back * RECORDSIZE; + else if ((real_s_sizeleft + RECORDSIZE - 1) / RECORDSIZE <= copy_back) + real_s_name[0] = '\0'; + else + { +#ifdef __MSDOS__ + if (save_name[1] == ':') + save_name += 2; +#endif + while (*save_name == '/') + save_name++; + + strcpy (real_s_name, save_name); + real_s_sizeleft = save_sizeleft; + real_s_totsize = save_totsize; + } + copy_back = 0; + } +} + +/* Handle write errors on the archive. Write errors are always fatal */ +/* Hitting the end of a volume does not cause a write error unless the write +* was the first block of the volume */ + +void +writeerror (err) + int err; +{ + if (err < 0) + { + msg_perror ("can't write to %s", ar_files[cur_ar_file]); + exit (EX_BADARCH); + } + else + { + msg ("only wrote %u of %u bytes to %s", err, blocksize, ar_files[cur_ar_file]); + exit (EX_BADARCH); + } +} + +/* + * Handle read errors on the archive. + * + * If the read should be retried, readerror() returns to the caller. + */ +void +readerror () +{ +# define READ_ERROR_MAX 10 + + read_error_flag++; /* Tell callers */ + + msg_perror ("read error on %s", ar_files[cur_ar_file]); + + if (baserec == 0) + { + /* First block of tape. Probably stupidity error */ + exit (EX_BADARCH); + } + + /* + * Read error in mid archive. We retry up to READ_ERROR_MAX times + * and then give up on reading the archive. We set read_error_flag + * for our callers, so they can cope if they want. + */ + if (r_error_count++ > READ_ERROR_MAX) + { + msg ("Too many errors, quitting."); + exit (EX_BADARCH); + } + return; +} + + +/* + * Perform a read to flush the buffer. + */ +void +fl_read () +{ + int err; /* Result from system call */ + int left; /* Bytes left */ + char *more; /* Pointer to next byte to read */ + + if (f_checkpoint && !(++checkpoint % 10)) + msg ("Read checkpoint %d\n", checkpoint); + + /* + * Clear the count of errors. This only applies to a single + * call to fl_read. We leave read_error_flag alone; it is + * only turned off by higher level software. + */ + r_error_count = 0; /* Clear error count */ + + /* + * If we are about to wipe out a record that + * somebody needs to keep, copy it out to a holding + * area and adjust somebody's pointer to it. + */ + if (save_rec && + *save_rec >= ar_record && + *save_rec < ar_last) + { + record_save_area = **save_rec; + *save_rec = &record_save_area; + } + if (write_archive_to_stdout && baserec != 0) + { + err = rmtwrite (1, ar_block->charptr, blocksize); + if (err != blocksize) + writeerror (err); + } + if (f_multivol) + { + if (save_name) + { + if (save_name != real_s_name) + { +#ifdef __MSDOS__ + if (save_name[1] == ':') + save_name += 2; +#endif + while (*save_name == '/') + save_name++; + + strcpy (real_s_name, save_name); + save_name = real_s_name; + } + real_s_totsize = save_totsize; + real_s_sizeleft = save_sizeleft; + + } + else + { + real_s_name[0] = '\0'; + real_s_totsize = 0; + real_s_sizeleft = 0; + } + } + +error_loop: + err = rmtread (archive, ar_block->charptr, (int) blocksize); + if (err == blocksize) + return; + + if ((err == 0 || (err < 0 && errno == ENOSPC) || (err > 0 && !f_reblock)) && f_multivol) + { + union record *head; + + try_volume: + if (new_volume ((cmd_mode == CMD_APPEND || cmd_mode == CMD_CAT || cmd_mode == CMD_UPDATE) ? 2 : 1) < 0) + return; + vol_error: + err = rmtread (archive, ar_block->charptr, (int) blocksize); + if (err < 0) + { + readerror (); + goto vol_error; + } + if (err != blocksize) + goto short_read; + + head = ar_block; + + if (head->header.linkflag == LF_VOLHDR) + { + if (f_volhdr) + { +#if 0 + char *ptr; + + ptr = (char *) malloc (strlen (f_volhdr) + 20); + sprintf (ptr, "%s Volume %d", f_volhdr, volno); +#endif + if (re_match (label_pattern, head->header.arch_name, + strlen (head->header.arch_name), + 0, 0) < 0) + { + msg ("Volume mismatch! %s!=%s", f_volhdr, + head->header.arch_name); + --volno; + --global_volno; + goto try_volume; + } + +#if 0 + if (strcmp (ptr, head->header.name)) + { + msg ("Volume mismatch! %s!=%s", ptr, head->header.name); + --volno; + --global_volno; + free (ptr); + goto try_volume; + } + free (ptr); +#endif + } + if (f_verbose) + fprintf (msg_file, "Reading %s\n", head->header.arch_name); + head++; + } + else if (f_volhdr) + { + msg ("Warning: No volume header!"); + } + + if (real_s_name[0]) + { + long from_oct (); + + if (head->header.linkflag != LF_MULTIVOL || strcmp (head->header.arch_name, real_s_name)) + { + msg ("%s is not continued on this volume!", real_s_name); + --volno; + --global_volno; + goto try_volume; + } + if (real_s_totsize != from_oct (1 + 12, head->header.size) + from_oct (1 + 12, head->header.offset)) + { + msg ("%s is the wrong size (%ld!=%ld+%ld)", + head->header.arch_name, save_totsize, + from_oct (1 + 12, head->header.size), + from_oct (1 + 12, head->header.offset)); + --volno; + --global_volno; + goto try_volume; + } + if (real_s_totsize - real_s_sizeleft != from_oct (1 + 12, head->header.offset)) + { + msg ("This volume is out of sequence"); + --volno; + --global_volno; + goto try_volume; + } + head++; + } + ar_record = head; + return; + } + else if (err < 0) + { + readerror (); + goto error_loop; /* Try again */ + } + +short_read: + more = ar_block->charptr + err; + left = blocksize - err; + +again: + if (0 == (((unsigned) left) % RECORDSIZE)) + { + /* FIXME, for size=0, multi vol support */ + /* On the first block, warn about the problem */ + if (!f_reblock && baserec == 0 && f_verbose && err > 0) + { + /* msg("Blocksize = %d record%s", + err / RECORDSIZE, (err > RECORDSIZE)? "s": "");*/ + msg ("Blocksize = %d records", err / RECORDSIZE); + } + ar_last = ar_block + ((unsigned) (blocksize - left)) / RECORDSIZE; + return; + } + if (f_reblock) + { + /* + * User warned us about this. Fix up. + */ + if (left > 0) + { + error2loop: + err = rmtread (archive, more, (int) left); + if (err < 0) + { + readerror (); + goto error2loop; /* Try again */ + } + if (err == 0) + { + msg ("archive %s EOF not on block boundary", ar_files[cur_ar_file]); + exit (EX_BADARCH); + } + left -= err; + more += err; + goto again; + } + } + else + { + msg ("only read %d bytes from archive %s", err, ar_files[cur_ar_file]); + exit (EX_BADARCH); + } +} + + +/* + * Flush the current buffer to/from the archive. + */ +void +flush_archive () +{ + int c; + + baserec += ar_last - ar_block;/* Keep track of block #s */ + ar_record = ar_block; /* Restore pointer to start */ + ar_last = ar_block + blocking;/* Restore pointer to end */ + + if (ar_reading) + { + if (time_to_start_writing) + { + time_to_start_writing = 0; + ar_reading = 0; + + if (file_to_switch_to >= 0) + { + if ((c = rmtclose (archive)) < 0) + msg_perror ("Warning: can't close %s(%d,%d)", ar_files[cur_ar_file], archive, c); + + archive = file_to_switch_to; + } + else + (void) backspace_output (); + fl_write (); + } + else + fl_read (); + } + else + { + fl_write (); + } +} + +/* Backspace the archive descriptor by one blocks worth. + If its a tape, MTIOCTOP will work. If its something else, + we try to seek on it. If we can't seek, we lose! */ +int +backspace_output () +{ + long cur; + /* int er; */ + extern char *output_start; + +#ifdef MTIOCTOP + struct mtop t; + + t.mt_op = MTBSR; + t.mt_count = 1; + if ((rmtioctl (archive, MTIOCTOP, &t)) >= 0) + return 1; + if (errno == EIO && (rmtioctl (archive, MTIOCTOP, &t)) >= 0) + return 1; +#endif + + cur = rmtlseek (archive, 0L, 1); + cur -= blocksize; + /* Seek back to the beginning of this block and + start writing there. */ + + if (rmtlseek (archive, cur, 0) != cur) + { + /* Lseek failed. Try a different method */ + msg ("Couldn't backspace archive file. It may be unreadable without -i."); + /* Replace the first part of the block with nulls */ + if (ar_block->charptr != output_start) + bzero (ar_block->charptr, output_start - ar_block->charptr); + return 2; + } + return 3; +} + + +/* + * Close the archive file. + */ +void +close_archive () +{ + int child; + int status; + int c; + + if (time_to_start_writing || !ar_reading) + flush_archive (); + if (cmd_mode == CMD_DELETE) + { + off_t pos; + + pos = rmtlseek (archive, 0L, 1); +#ifndef __MSDOS__ + (void) ftruncate (archive, pos); +#else + (void) rmtwrite (archive, "", 0); +#endif + } + if (f_verify) + verify_volume (); + + if ((c = rmtclose (archive)) < 0) + msg_perror ("Warning: can't close %s(%d,%d)", ar_files[cur_ar_file], archive, c); + +#ifndef __MSDOS__ + if (childpid) + { + /* + * Loop waiting for the right child to die, or for + * no more kids. + */ + while (((child = wait (&status)) != childpid) && child != -1) + ; + + if (child != -1) + { + if (WIFSIGNALED (status)) + { + /* SIGPIPE is OK, everything else is a problem. */ + if (WTERMSIG (status) != SIGPIPE) + msg ("child died with signal %d%s", WTERMSIG (status), + WIFCOREDUMPED (status) ? " (core dumped)" : ""); + } + else + { + /* Child voluntarily terminated -- but why? */ + if (WEXITSTATUS (status) == MAGIC_STAT) + { + exit (EX_SYSTEM); /* Child had trouble */ + } + if (WEXITSTATUS (status) == (SIGPIPE + 128)) + { + /* + * /bin/sh returns this if its child + * dies with SIGPIPE. 'Sok. + */ + /* Do nothing. */ + } + else if (WEXITSTATUS (status)) + msg ("child returned status %d", + WEXITSTATUS (status)); + } + } + } +#endif /* __MSDOS__ */ +} + + +#ifdef DONTDEF +/* + * Message management. + * + * anno writes a message prefix on stream (eg stdout, stderr). + * + * The specified prefix is normally output followed by a colon and a space. + * However, if other command line options are set, more output can come + * out, such as the record # within the archive. + * + * If the specified prefix is NULL, no output is produced unless the + * command line option(s) are set. + * + * If the third argument is 1, the "saved" record # is used; if 0, the + * "current" record # is used. + */ +void +anno (stream, prefix, savedp) + FILE *stream; + char *prefix; + int savedp; +{ +# define MAXANNO 50 + char buffer[MAXANNO]; /* Holds annorecment */ +# define ANNOWIDTH 13 + int space; + long offset; + int save_e; + + save_e = errno; + /* Make sure previous output gets out in sequence */ + if (stream == stderr) + fflush (stdout); + if (f_sayblock) + { + if (prefix) + { + fputs (prefix, stream); + putc (' ', stream); + } + offset = ar_record - ar_block; + (void) sprintf (buffer, "rec %d: ", + savedp ? saved_recno : + baserec + offset); + fputs (buffer, stream); + space = ANNOWIDTH - strlen (buffer); + if (space > 0) + { + fprintf (stream, "%*s", space, ""); + } + } + else if (prefix) + { + fputs (prefix, stream); + fputs (": ", stream); + } + errno = save_e; +} + +#endif + +/* Called to initialize the global volume number. */ +void +init_volume_number () +{ + FILE *vf; + + vf = fopen (f_volno_file, "r"); + if (!vf && errno != ENOENT) + msg_perror ("%s", f_volno_file); + + if (vf) + { + fscanf (vf, "%d", &global_volno); + fclose (vf); + } +} + +/* Called to write out the closing global volume number. */ +void +closeout_volume_number () +{ + FILE *vf; + + vf = fopen (f_volno_file, "w"); + if (!vf) + msg_perror ("%s", f_volno_file); + else + { + fprintf (vf, "%d\n", global_volno); + fclose (vf); + } +} + +/* We've hit the end of the old volume. Close it and open the next one */ +/* Values for type: 0: writing 1: reading 2: updating */ +int +new_volume (type) + int type; +{ + int c; + char inbuf[80]; + char *p; + static FILE *read_file = 0; + extern int now_verifying; + extern char TTY_NAME[]; + static int looped = 0; + + if (!read_file && !f_run_script_at_end) + read_file = (archive == 0) ? fopen (TTY_NAME, "r") : stdin; + + if (now_verifying) + return -1; + if (f_verify) + verify_volume (); + if ((c = rmtclose (archive)) < 0) + msg_perror ("Warning: can't close %s(%d,%d)", ar_files[cur_ar_file], archive, c); + + global_volno++; + volno++; + cur_ar_file++; + if (cur_ar_file == n_ar_files) + { + cur_ar_file = 0; + looped = 1; + } + +tryagain: + if (looped) + { + /* We have to prompt from now on. */ + if (f_run_script_at_end) + { + closeout_volume_number (); + system (info_script); + } + else + for (;;) + { + fprintf (msg_file, "\007Prepare volume #%d for %s and hit return: ", global_volno, ar_files[cur_ar_file]); + fflush (msg_file); + if (fgets (inbuf, sizeof (inbuf), read_file) == 0) + { + fprintf (msg_file, "EOF? What does that mean?"); + if (cmd_mode != CMD_EXTRACT && cmd_mode != CMD_LIST && cmd_mode != CMD_DIFF) + msg ("Warning: Archive is INCOMPLETE!"); + exit (EX_BADARCH); + } + if (inbuf[0] == '\n' || inbuf[0] == 'y' || inbuf[0] == 'Y') + break; + + switch (inbuf[0]) + { + case '?': + { + fprintf (msg_file, "\ + n [name] Give a new filename for the next (and subsequent) volume(s)\n\ + q Abort tar\n\ + ! Spawn a subshell\n\ + ? Print this list\n"); + } + break; + + case 'q': /* Quit */ + fprintf (msg_file, "No new volume; exiting.\n"); + if (cmd_mode != CMD_EXTRACT && cmd_mode != CMD_LIST && cmd_mode != CMD_DIFF) + msg ("Warning: Archive is INCOMPLETE!"); + exit (EX_BADARCH); + + case 'n': /* Get new file name */ + { + char *q, *r; + static char *old_name; + + for (q = &inbuf[1]; *q == ' ' || *q == '\t'; q++) + ; + for (r = q; *r; r++) + if (*r == '\n') + *r = '\0'; + old_name = p = (char *) malloc ((unsigned) (strlen (q) + 2)); + if (p == 0) + { + msg ("Can't allocate memory for name"); + exit (EX_SYSTEM); + } + (void) strcpy (p, q); + ar_files[cur_ar_file] = p; + } + break; + + case '!': +#ifdef __MSDOS__ + spawnl (P_WAIT, getenv ("COMSPEC"), "-", 0); +#else + /* JF this needs work! */ + switch (fork ()) + { + case -1: + msg_perror ("can't fork!"); + break; + case 0: + p = getenv ("SHELL"); + if (p == 0) + p = "/bin/sh"; + execlp (p, "-sh", "-i", 0); + msg_perror ("can't exec a shell %s", p); + _exit (55); + default: + wait (0); + break; + } +#endif + break; + } + } + } + + + if (type == 2 || f_verify) + archive = rmtopen (ar_files[cur_ar_file], O_RDWR | O_CREAT, 0666); + else if (type == 1) + archive = rmtopen (ar_files[cur_ar_file], O_RDONLY, 0666); + else if (type == 0) + archive = rmtcreat (ar_files[cur_ar_file], 0666); + else + archive = -1; + + if (archive < 0) + { + msg_perror ("can't open %s", ar_files[cur_ar_file]); + goto tryagain; + } +#ifdef __MSDOS__ + setmode (archive, O_BINARY); +#endif + return 0; +} + +/* this is a useless function that takes a buffer returned by wantbytes + and does nothing with it. If the function called by wantbytes returns + an error indicator (non-zero), this function is called for the rest of + the file. + */ +int +no_op (size, data) + int size; + char *data; +{ + return 0; +} + +/* Some other routine wants SIZE bytes in the archive. For each chunk of + the archive, call FUNC with the size of the chunk, and the address of + the chunk it can work with. + */ +int +wantbytes (size, func) + long size; + int (*func) (); +{ + char *data; + long data_size; + + while (size) + { + data = findrec ()->charptr; + if (data == NULL) + { /* Check it... */ + msg ("Unexpected EOF on archive file"); + return -1; + } + data_size = endofrecs ()->charptr - data; + if (data_size > size) + data_size = size; + if ((*func) (data_size, data)) + func = no_op; + userec ((union record *) (data + data_size - 1)); + size -= data_size; + } + return 0; +} diff --git a/gnu/usr.bin/tar/create.c b/gnu/usr.bin/tar/create.c new file mode 100644 index 0000000..62b9c51 --- /dev/null +++ b/gnu/usr.bin/tar/create.c @@ -0,0 +1,1454 @@ +/* Create a tar archive. + Copyright (C) 1985, 1992, 1993 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * Create a tar archive. + * + * Written 25 Aug 1985 by John Gilmore, ihnp4!hoptoad!gnu. + */ + +#ifdef _AIX + #pragma alloca +#endif +#include <sys/types.h> +#include <stdio.h> +#include <errno.h> +#ifndef STDC_HEADERS +extern int errno; +#endif + +#ifdef BSD42 +#include <sys/file.h> +#else +#ifndef V7 +#include <fcntl.h> +#endif +#endif + +#include "tar.h" +#include "port.h" + +#ifndef __MSDOS__ +#include <pwd.h> +#include <grp.h> +#endif + +#if defined (_POSIX_VERSION) +#include <utime.h> +#else +struct utimbuf +{ + long actime; + long modtime; +}; + +#endif + +extern struct stat hstat; /* Stat struct corresponding */ + +#ifndef __MSDOS__ +extern dev_t ar_dev; +extern ino_t ar_ino; +#endif + +/* JF */ +extern struct name *gnu_list_name; + +/* + * If there are no symbolic links, there is no lstat(). Use stat(). + */ +#ifndef S_ISLNK +#define lstat stat +#endif + +extern void print_header (); + +union record *start_header (); +void blank_name_list (); +int check_exclude (); +PTR ck_malloc (); +PTR ck_realloc (); +void clear_buffer (); +void close_archive (); +void collect_and_sort_names (); +int confirm (); +int deal_with_sparse (); +void find_new_file_size (); +void finish_header (); +int finish_sparse_file (); +void finduname (); +void findgname (); +int is_dot_or_dotdot (); +void open_archive (); +char *name_next (); +void name_close (); +void to_oct (); +void dump_file (); +void write_dir_file (); +void write_eot (); +void write_long (); +int zero_record (); + +/* This code moved from tar.h since create.c is the only file that cares + about 'struct link's. This means that other files might not have to + include sys/types.h any more. */ + +struct link + { + struct link *next; + dev_t dev; + ino_t ino; + short linkcount; + char name[1]; + }; + +struct link *linklist; /* Points to first link in list */ + +static nolinks; /* Gets set if we run out of RAM */ + +/* + * "Scratch" space to store the information about a sparse file before + * writing the info into the header or extended header + */ +/* struct sp_array *sparsearray;*/ + +/* number of elts storable in the sparsearray */ +/*int sparse_array_size = 10;*/ + +void +create_archive () +{ + register char *p; + char *name_from_list (); + + open_archive (0); /* Open for writing */ + + if (f_gnudump) + { + char *buf = ck_malloc (PATH_MAX); + char *q, *bufp; + + collect_and_sort_names (); + + while (p = name_from_list ()) + dump_file (p, -1, 1); + /* if(!f_dironly) { */ + blank_name_list (); + while (p = name_from_list ()) + { + strcpy (buf, p); + if (p[strlen (p) - 1] != '/') + strcat (buf, "/"); + bufp = buf + strlen (buf); + for (q = gnu_list_name->dir_contents; q && *q; q += strlen (q) + 1) + { + if (*q == 'Y') + { + strcpy (bufp, q + 1); + dump_file (buf, -1, 1); + } + } + } + /* } */ + free (buf); + } + else + { + while (p = name_next (1)) + dump_file (p, -1, 1); + } + + write_eot (); + close_archive (); + if (f_gnudump) + write_dir_file (); + name_close (); +} + +/* + * Dump a single file. If it's a directory, recurse. + * Result is 1 for success, 0 for failure. + * Sets global "hstat" to stat() output for this file. + */ +void +dump_file (p, curdev, toplevel) + char *p; /* File name to dump */ + int curdev; /* Device our parent dir was on */ + int toplevel; /* Whether we are a toplevel call */ +{ + union record *header; + char type; + extern char *save_name; /* JF for multi-volume support */ + extern long save_totsize; + extern long save_sizeleft; + union record *exhdr; + char save_linkflag; + extern time_t new_time; + int critical_error = 0; + struct utimbuf restore_times; + /* int sparse_ind = 0;*/ + + + if (f_confirm && !confirm ("add", p)) + return; + + /* + * Use stat if following (rather than dumping) 4.2BSD's + * symbolic links. Otherwise, use lstat (which, on non-4.2 + * systems, is #define'd to stat anyway. + */ +#ifdef STX_HIDDEN /* AIX */ + if (0 != f_follow_links ? + statx (p, &hstat, STATSIZE, STX_HIDDEN) : + statx (p, &hstat, STATSIZE, STX_HIDDEN | STX_LINK)) +#else + if (0 != f_follow_links ? stat (p, &hstat) : lstat (p, &hstat)) +#endif + { + badperror: + msg_perror ("can't add file %s", p); + badfile: + if (!f_ignore_failed_read || critical_error) + errors++; + return; + } + + restore_times.actime = hstat.st_atime; + restore_times.modtime = hstat.st_mtime; + +#ifdef S_ISHIDDEN + if (S_ISHIDDEN (hstat.st_mode)) + { + char *new = (char *) alloca (strlen (p) + 2); + if (new) + { + strcpy (new, p); + strcat (new, "@"); + p = new; + } + } +#endif + + /* See if we only want new files, and check if this one is too old to + put in the archive. */ + if (f_new_files + && !f_gnudump + && new_time > hstat.st_mtime + && !S_ISDIR (hstat.st_mode) + && (f_new_files > 1 || new_time > hstat.st_ctime)) + { + if (curdev == -1) + { + msg ("%s: is unchanged; not dumped", p); + } + return; + } + +#ifndef __MSDOS__ + /* See if we are trying to dump the archive */ + if (ar_dev && hstat.st_dev == ar_dev && hstat.st_ino == ar_ino) + { + msg ("%s is the archive; not dumped", p); + return; + } +#endif + /* + * Check for multiple links. + * + * We maintain a list of all such files that we've written so + * far. Any time we see another, we check the list and + * avoid dumping the data again if we've done it once already. + */ + if (hstat.st_nlink > 1 + && (S_ISREG (hstat.st_mode) +#ifdef S_ISCTG + || S_ISCTG (hstat.st_mode) +#endif +#ifdef S_ISCHR + || S_ISCHR (hstat.st_mode) +#endif +#ifdef S_ISBLK + || S_ISBLK (hstat.st_mode) +#endif +#ifdef S_ISFIFO + || S_ISFIFO (hstat.st_mode) +#endif + )) + { + register struct link *lp; + + /* First quick and dirty. Hashing, etc later FIXME */ + for (lp = linklist; lp; lp = lp->next) + { + if (lp->ino == hstat.st_ino && + lp->dev == hstat.st_dev) + { + char *link_name = lp->name; + + /* We found a link. */ + while (!f_absolute_paths && *link_name == '/') + { + static int link_warn = 0; + + if (!link_warn) + { + msg ("Removing leading / from absolute links"); + link_warn++; + } + link_name++; + } + if (link_name - lp->name >= NAMSIZ) + write_long (link_name, LF_LONGLINK); + current_link_name = link_name; + + hstat.st_size = 0; + header = start_header (p, &hstat); + if (header == NULL) + { + critical_error = 1; + goto badfile; + } + strncpy (header->header.arch_linkname, + link_name, NAMSIZ); + + /* Force null truncated */ + header->header.arch_linkname[NAMSIZ - 1] = 0; + + header->header.linkflag = LF_LINK; + finish_header (header); + /* FIXME: Maybe remove from list after all links found? */ + if (f_remove_files) + { + if (unlink (p) == -1) + msg_perror ("cannot remove %s", p); + } + return; /* We dumped it */ + } + } + + /* Not found. Add it to the list of possible links. */ + lp = (struct link *) ck_malloc ((unsigned) (sizeof (struct link) + strlen (p))); + if (!lp) + { + if (!nolinks) + { + msg ( + "no memory for links, they will be dumped as separate files"); + nolinks++; + } + } + lp->ino = hstat.st_ino; + lp->dev = hstat.st_dev; + strcpy (lp->name, p); + lp->next = linklist; + linklist = lp; + } + + /* + * This is not a link to a previously dumped file, so dump it. + */ + if (S_ISREG (hstat.st_mode) +#ifdef S_ISCTG + || S_ISCTG (hstat.st_mode) +#endif + ) + { + int f; /* File descriptor */ + long bufsize, count; + long sizeleft; + register union record *start; + int header_moved; + char isextended = 0; + int upperbound; + /* int end_nulls = 0; */ + + header_moved = 0; + +#ifdef BSD42 + if (f_sparse_files) + { + /* + * JK - This is the test for sparseness: whether the + * "size" of the file matches the number of blocks + * allocated for it. If there is a smaller number + * of blocks that would be necessary to accommodate + * a file of this size, we have a sparse file, i.e., + * at least one of those records in the file is just + * a useless hole. + */ +#ifdef hpux /* Nice of HPUX to gratuitiously change it, huh? - mib */ + if (hstat.st_size - (hstat.st_blocks * 1024) > 1024) +#else + if (hstat.st_size - (hstat.st_blocks * RECORDSIZE) > RECORDSIZE) +#endif + { + int filesize = hstat.st_size; + register int i; + + header = start_header (p, &hstat); + if (header == NULL) + { + critical_error = 1; + goto badfile; + } + header->header.linkflag = LF_SPARSE; + header_moved++; + + /* + * Call the routine that figures out the + * layout of the sparse file in question. + * UPPERBOUND is the index of the last + * element of the "sparsearray," i.e., + * the number of elements it needed to + * describe the file. + */ + + upperbound = deal_with_sparse (p, header); + + /* + * See if we'll need an extended header + * later + */ + if (upperbound > SPARSE_IN_HDR - 1) + header->header.isextended++; + /* + * We store the "real" file size so + * we can show that in case someone wants + * to list the archive, i.e., tar tvf <file>. + * It might be kind of disconcerting if the + * shrunken file size was the one that showed + * up. + */ + to_oct ((long) hstat.st_size, 1 + 12, + header->header.realsize); + + /* + * This will be the new "size" of the + * file, i.e., the size of the file + * minus the records of holes that we're + * skipping over. + */ + + find_new_file_size (&filesize, upperbound); + hstat.st_size = filesize; + to_oct ((long) filesize, 1 + 12, + header->header.size); + /* to_oct((long) end_nulls, 1+12, + header->header.ending_blanks);*/ + + for (i = 0; i < SPARSE_IN_HDR; i++) + { + if (!sparsearray[i].numbytes) + break; + to_oct (sparsearray[i].offset, 1 + 12, + header->header.sp[i].offset); + to_oct (sparsearray[i].numbytes, 1 + 12, + header->header.sp[i].numbytes); + } + + } + } +#else + upperbound = SPARSE_IN_HDR - 1; +#endif + + sizeleft = hstat.st_size; + /* Don't bother opening empty, world readable files. */ + if (sizeleft > 0 || 0444 != (0444 & hstat.st_mode)) + { + f = open (p, O_RDONLY | O_BINARY); + if (f < 0) + goto badperror; + } + else + { + f = -1; + } + + /* If the file is sparse, we've already taken care of this */ + if (!header_moved) + { + header = start_header (p, &hstat); + if (header == NULL) + { + if (f >= 0) + (void) close (f); + critical_error = 1; + goto badfile; + } + } +#ifdef S_ISCTG + /* Mark contiguous files, if we support them */ + if (f_standard && S_ISCTG (hstat.st_mode)) + { + header->header.linkflag = LF_CONTIG; + } +#endif + isextended = header->header.isextended; + save_linkflag = header->header.linkflag; + finish_header (header); + if (isextended) + { + /* int sum = 0;*/ + register int i; + /* register union record *exhdr;*/ + /* int arraybound = SPARSE_EXT_HDR;*/ + /* static */ int index_offset = SPARSE_IN_HDR; + + extend:exhdr = findrec (); + + if (exhdr == NULL) + { + critical_error = 1; + goto badfile; + } + bzero (exhdr->charptr, RECORDSIZE); + for (i = 0; i < SPARSE_EXT_HDR; i++) + { + if (i + index_offset > upperbound) + break; + to_oct ((long) sparsearray[i + index_offset].numbytes, + 1 + 12, + exhdr->ext_hdr.sp[i].numbytes); + to_oct ((long) sparsearray[i + index_offset].offset, + 1 + 12, + exhdr->ext_hdr.sp[i].offset); + } + userec (exhdr); + /* sum += i; + if (sum < upperbound) + goto extend;*/ + if (index_offset + i <= upperbound) + { + index_offset += i; + exhdr->ext_hdr.isextended++; + goto extend; + } + + } + if (save_linkflag == LF_SPARSE) + { + if (finish_sparse_file (f, &sizeleft, hstat.st_size, p)) + goto padit; + } + else + while (sizeleft > 0) + { + + if (f_multivol) + { + save_name = p; + save_sizeleft = sizeleft; + save_totsize = hstat.st_size; + } + start = findrec (); + + bufsize = endofrecs ()->charptr - start->charptr; + + if (sizeleft < bufsize) + { + /* Last read -- zero out area beyond */ + bufsize = (int) sizeleft; + count = bufsize % RECORDSIZE; + if (count) + bzero (start->charptr + sizeleft, + (int) (RECORDSIZE - count)); + } + count = read (f, start->charptr, bufsize); + if (count < 0) + { + msg_perror ("read error at byte %ld, reading\ + %d bytes, in file %s", hstat.st_size - sizeleft, bufsize, p); + goto padit; + } + sizeleft -= count; + + /* This is nonportable (the type of userec's arg). */ + userec (start + (count - 1) / RECORDSIZE); + + if (count == bufsize) + continue; + msg ("file %s shrunk by %d bytes, padding with zeros.", p, sizeleft); + goto padit; /* Short read */ + } + + if (f_multivol) + save_name = 0; + + if (f >= 0) + (void) close (f); + + if (f_remove_files) + { + if (unlink (p) == -1) + msg_perror ("cannot remove %s", p); + } + if (f_atime_preserve) + utime (p, &restore_times); + return; + + /* + * File shrunk or gave error, pad out tape to match + * the size we specified in the header. + */ + padit: + while (sizeleft > 0) + { + save_sizeleft = sizeleft; + start = findrec (); + bzero (start->charptr, RECORDSIZE); + userec (start); + sizeleft -= RECORDSIZE; + } + if (f_multivol) + save_name = 0; + if (f >= 0) + (void) close (f); + if (f_atime_preserve) + utime (p, &restore_times); + return; + } + +#ifdef S_ISLNK + else if (S_ISLNK (hstat.st_mode)) + { + int size; + char *buf = alloca (PATH_MAX + 1); + + size = readlink (p, buf, PATH_MAX + 1); + if (size < 0) + goto badperror; + buf[size] = '\0'; + if (size >= NAMSIZ) + write_long (buf, LF_LONGLINK); + current_link_name = buf; + + hstat.st_size = 0; /* Force 0 size on symlink */ + header = start_header (p, &hstat); + if (header == NULL) + { + critical_error = 1; + goto badfile; + } + strncpy (header->header.arch_linkname, buf, NAMSIZ); + header->header.arch_linkname[NAMSIZ - 1] = '\0'; + header->header.linkflag = LF_SYMLINK; + finish_header (header); /* Nothing more to do to it */ + if (f_remove_files) + { + if (unlink (p) == -1) + msg_perror ("cannot remove %s", p); + } + return; + } +#endif + + else if (S_ISDIR (hstat.st_mode)) + { + register DIR *dirp; + register struct dirent *d; + char *namebuf; + int buflen; + register int len; + int our_device = hstat.st_dev; + + /* Build new prototype name */ + len = strlen (p); + buflen = len + NAMSIZ; + namebuf = ck_malloc (buflen + 1); + strncpy (namebuf, p, buflen); + while (len >= 1 && '/' == namebuf[len - 1]) + len--; /* Delete trailing slashes */ + namebuf[len++] = '/'; /* Now add exactly one back */ + namebuf[len] = '\0'; /* Make sure null-terminated */ + + /* + * Output directory header record with permissions + * FIXME, do this AFTER files, to avoid R/O dir problems? + * If old archive format, don't write record at all. + */ + if (!f_oldarch) + { + hstat.st_size = 0; /* Force 0 size on dir */ + /* + * If people could really read standard archives, + * this should be: (FIXME) + header = start_header(f_standard? p: namebuf, &hstat); + * but since they'd interpret LF_DIR records as + * regular files, we'd better put the / on the name. + */ + header = start_header (namebuf, &hstat); + if (header == NULL) + { + critical_error = 1; + goto badfile; /* eg name too long */ + } + + if (f_gnudump) + header->header.linkflag = LF_DUMPDIR; + else if (f_standard) + header->header.linkflag = LF_DIR; + + /* If we're gnudumping, we aren't done yet so don't close it. */ + if (!f_gnudump) + finish_header (header); /* Done with directory header */ + } + + if (f_gnudump) + { + int sizeleft; + int totsize; + int bufsize; + union record *start; + int count; + char *buf, *p_buf; + + buf = gnu_list_name->dir_contents; /* FOO */ + totsize = 0; + for (p_buf = buf; p_buf && *p_buf;) + { + int tmp; + + tmp = strlen (p_buf) + 1; + totsize += tmp; + p_buf += tmp; + } + totsize++; + to_oct ((long) totsize, 1 + 12, header->header.size); + finish_header (header); + p_buf = buf; + sizeleft = totsize; + while (sizeleft > 0) + { + if (f_multivol) + { + save_name = p; + save_sizeleft = sizeleft; + save_totsize = totsize; + } + start = findrec (); + bufsize = endofrecs ()->charptr - start->charptr; + if (sizeleft < bufsize) + { + bufsize = sizeleft; + count = bufsize % RECORDSIZE; + if (count) + bzero (start->charptr + sizeleft, RECORDSIZE - count); + } + bcopy (p_buf, start->charptr, bufsize); + sizeleft -= bufsize; + p_buf += bufsize; + userec (start + (bufsize - 1) / RECORDSIZE); + } + if (f_multivol) + save_name = 0; + if (f_atime_preserve) + utime (p, &restore_times); + return; + } + + /* Now output all the files in the directory */ +#if 0 + if (f_dironly) + return; /* Unless the cmdline said not to */ +#endif + /* + * See if we are crossing from one file system to another, + * and avoid doing so if the user only wants to dump one file system. + */ + if (f_local_filesys && !toplevel && curdev != hstat.st_dev) + { + if (f_verbose) + msg ("%s: is on a different filesystem; not dumped", p); + return; + } + + + errno = 0; + dirp = opendir (p); + if (!dirp) + { + if (errno) + { + msg_perror ("can't open directory %s", p); + } + else + { + msg ("error opening directory %s", + p); + } + return; + } + + /* Hack to remove "./" from the front of all the file names */ + if (len == 2 && namebuf[0] == '.' && namebuf[1] == '/') + len = 0; + + /* Should speed this up by cd-ing into the dir, FIXME */ + while (NULL != (d = readdir (dirp))) + { + /* Skip . and .. */ + if (is_dot_or_dotdot (d->d_name)) + continue; + + if (NLENGTH (d) + len >= buflen) + { + buflen = len + NLENGTH (d); + namebuf = ck_realloc (namebuf, buflen + 1); + /* namebuf[len]='\0'; + msg("file name %s%s too long", + namebuf, d->d_name); + continue; */ + } + strcpy (namebuf + len, d->d_name); + if (f_exclude && check_exclude (namebuf)) + continue; + dump_file (namebuf, our_device, 0); + } + + closedir (dirp); + free (namebuf); + if (f_atime_preserve) + utime (p, &restore_times); + return; + } + +#ifdef S_ISCHR + else if (S_ISCHR (hstat.st_mode)) + { + type = LF_CHR; + } +#endif + +#ifdef S_ISBLK + else if (S_ISBLK (hstat.st_mode)) + { + type = LF_BLK; + } +#endif + + /* Avoid screwy apollo lossage where S_IFIFO == S_IFSOCK */ +#if (_ISP__M68K == 0) && (_ISP__A88K == 0) && defined(S_ISFIFO) + else if (S_ISFIFO (hstat.st_mode)) + { + type = LF_FIFO; + } +#endif + +#ifdef S_ISSOCK + else if (S_ISSOCK (hstat.st_mode)) + { + type = LF_FIFO; + } +#endif + else + goto unknown; + + if (!f_standard) + goto unknown; + + hstat.st_size = 0; /* Force 0 size */ + header = start_header (p, &hstat); + if (header == NULL) + { + critical_error = 1; + goto badfile; /* eg name too long */ + } + + header->header.linkflag = type; +#if defined(S_IFBLK) || defined(S_IFCHR) + if (type != LF_FIFO) + { + to_oct ((long) major (hstat.st_rdev), 8, + header->header.devmajor); + to_oct ((long) minor (hstat.st_rdev), 8, + header->header.devminor); + } +#endif + + finish_header (header); + if (f_remove_files) + { + if (unlink (p) == -1) + msg_perror ("cannot remove %s", p); + } + return; + +unknown: + msg ("%s: Unknown file type; file ignored.", p); +} + +int +finish_sparse_file (fd, sizeleft, fullsize, name) + int fd; + long *sizeleft, fullsize; + char *name; +{ + union record *start; + char tempbuf[RECORDSIZE]; + int bufsize, sparse_ind = 0, count; + long pos; + long nwritten = 0; + + + while (*sizeleft > 0) + { + start = findrec (); + bzero (start->charptr, RECORDSIZE); + bufsize = sparsearray[sparse_ind].numbytes; + if (!bufsize) + { /* we blew it, maybe */ + msg ("Wrote %ld of %ld bytes to file %s", + fullsize - *sizeleft, fullsize, name); + break; + } + pos = lseek (fd, sparsearray[sparse_ind++].offset, 0); + /* + * If the number of bytes to be written here exceeds + * the size of the temporary buffer, do it in steps. + */ + while (bufsize > RECORDSIZE) + { + /* if (amt_read) { + count = read(fd, start->charptr+amt_read, RECORDSIZE-amt_read); + bufsize -= RECORDSIZE - amt_read; + amt_read = 0; + userec(start); + start = findrec(); + bzero(start->charptr, RECORDSIZE); + }*/ + /* store the data */ + count = read (fd, start->charptr, RECORDSIZE); + if (count < 0) + { + msg_perror ("read error at byte %ld, reading %d bytes, in file %s", + fullsize - *sizeleft, bufsize, name); + return 1; + } + bufsize -= count; + *sizeleft -= count; + userec (start); + nwritten += RECORDSIZE; /* XXX */ + start = findrec (); + bzero (start->charptr, RECORDSIZE); + } + + + clear_buffer (tempbuf); + count = read (fd, tempbuf, bufsize); + bcopy (tempbuf, start->charptr, RECORDSIZE); + if (count < 0) + { + msg_perror ("read error at byte %ld, reading %d bytes, in file %s", + fullsize - *sizeleft, bufsize, name); + return 1; + } + /* if (amt_read >= RECORDSIZE) { + amt_read = 0; + userec(start+(count-1)/RECORDSIZE); + if (count != bufsize) { + msg("file %s shrunk by %d bytes, padding with zeros.", name, sizeleft); + return 1; + } + start = findrec(); + } else + amt_read += bufsize;*/ + nwritten += count; /* XXX */ + *sizeleft -= count; + userec (start); + + } + free (sparsearray); + /* printf ("Amount actually written is (I hope) %d.\n", nwritten); */ + /* userec(start+(count-1)/RECORDSIZE);*/ + return 0; + +} + +void +init_sparsearray () +{ + register int i; + + sp_array_size = 10; + /* + * Make room for our scratch space -- initially is 10 elts long + */ + sparsearray = (struct sp_array *) ck_malloc (sp_array_size * sizeof (struct sp_array)); + for (i = 0; i < sp_array_size; i++) + { + sparsearray[i].offset = 0; + sparsearray[i].numbytes = 0; + } +} + + + +/* + * Okay, we've got a sparse file on our hands -- now, what we need to do is + * make a pass through the file and carefully note where any data is, i.e., + * we want to find how far into the file each instance of data is, and how + * many bytes are there. We store this information in the sparsearray, + * which will later be translated into header information. For now, we use + * the sparsearray as convenient storage. + * + * As a side note, this routine is a mess. If I could have found a cleaner + * way to do it, I would have. If anyone wants to find a nicer way to do + * this, feel free. + */ + +/* There is little point in trimming small amounts of null data at the */ +/* head and tail of blocks -- it's ok if we only avoid dumping blocks */ +/* of complete null data */ +int +deal_with_sparse (name, header, nulls_at_end) + char *name; + union record *header; + int nulls_at_end; +{ + long numbytes = 0; + long offset = 0; + /* long save_offset;*/ + int fd; + /* int current_size = hstat.st_size;*/ + int sparse_ind = 0, cc; + char buf[RECORDSIZE]; +#if 0 + int read_last_data = 0; /* did we just read the last record? */ +#endif + int amidst_data = 0; + + header->header.isextended = 0; + /* + * Can't open the file -- this problem will be caught later on, + * so just return. + */ + if ((fd = open (name, O_RDONLY)) < 0) + return 0; + + init_sparsearray (); + clear_buffer (buf); + + while ((cc = read (fd, buf, sizeof buf)) != 0) + { + + if (sparse_ind > sp_array_size - 1) + { + + /* + * realloc the scratch area, since we've run out of room -- + */ + sparsearray = (struct sp_array *) + ck_realloc (sparsearray, + 2 * sp_array_size * (sizeof (struct sp_array))); + sp_array_size *= 2; + } + if (cc == sizeof buf) + { + if (zero_record (buf)) + { + if (amidst_data) + { + sparsearray[sparse_ind++].numbytes + = numbytes; + amidst_data = 0; + } + } + else + { /* !zero_record(buf) */ + if (amidst_data) + numbytes += cc; + else + { + amidst_data = 1; + numbytes = cc; + sparsearray[sparse_ind].offset + = offset; + } + } + } + else if (cc < sizeof buf) + { + /* This has to be the last bit of the file, so this */ + /* is somewhat shorter than the above. */ + if (!zero_record (buf)) + { + if (!amidst_data) + { + amidst_data = 1; + numbytes = cc; + sparsearray[sparse_ind].offset + = offset; + } + else + numbytes += cc; + } + } + offset += cc; + clear_buffer (buf); + } + if (amidst_data) + sparsearray[sparse_ind++].numbytes = numbytes; + else + { + sparsearray[sparse_ind].offset = offset-1; + sparsearray[sparse_ind++].numbytes = 1; + } + close (fd); + + return sparse_ind - 1; +} + +/* + * Just zeroes out the buffer so we don't confuse ourselves with leftover + * data. + */ +void +clear_buffer (buf) + char *buf; +{ + register int i; + + for (i = 0; i < RECORDSIZE; i++) + buf[i] = '\0'; +} + +#if 0 /* I'm leaving this as a monument to Joy Kendall, who wrote it -mib */ +/* + * JK - + * This routine takes a character array, and tells where within that array + * the data can be found. It skips over any zeros, and sets the first + * non-zero point in the array to be the "start", and continues until it + * finds non-data again, which is marked as the "end." This routine is + * mainly for 1) seeing how far into a file we must lseek to data, given + * that we have a sparse file, and 2) determining the "real size" of the + * file, i.e., the number of bytes in the sparse file that are data, as + * opposed to the zeros we are trying to skip. + */ +where_is_data (from, to, buffer) + int *from, *to; + char *buffer; +{ + register int i = 0; + register int save_to = *to; + int amidst_data = 0; + + + while (!buffer[i]) + i++; + *from = i; + + if (*from < 16) /* don't bother */ + *from = 0; + /* keep going to make sure there isn't more real + data in this record */ + while (i < RECORDSIZE) + { + if (!buffer[i]) + { + if (amidst_data) + { + save_to = i; + amidst_data = 0; + } + i++; + } + else if (buffer[i]) + { + if (!amidst_data) + amidst_data = 1; + i++; + } + } + if (i == RECORDSIZE) + *to = i; + else + *to = save_to; + +} + +#endif + +/* Note that this routine is only called if zero_record returned true */ +#if 0 /* But we actually don't need it at all. */ +where_is_data (from, to, buffer) + int *from, *to; + char *buffer; +{ + char *fp, *tp; + + for (fp = buffer; !*fp; fp++) + ; + for (tp = buffer + RECORDSIZE - 1; !*tp; tp--) + ; + *from = fp - buffer; + *to = tp - buffer + 1; +} + +#endif + + + +/* + * Takes a recordful of data and basically cruises through it to see if + * it's made *entirely* of zeros, returning a 0 the instant it finds + * something that is a non-zero, i.e., useful data. + */ +int +zero_record (buffer) + char *buffer; +{ + register int i; + + for (i = 0; i < RECORDSIZE; i++) + if (buffer[i] != '\000') + return 0; + return 1; +} + +void +find_new_file_size (filesize, highest_index) + int *filesize; + int highest_index; +{ + register int i; + + *filesize = 0; + for (i = 0; sparsearray[i].numbytes && i <= highest_index; i++) + *filesize += sparsearray[i].numbytes; +} + +/* + * Make a header block for the file name whose stat info is st . + * Return header pointer for success, NULL if the name is too long. + */ +union record * +start_header (name, st) + char *name; + register struct stat *st; +{ + register union record *header; + + if (strlen (name) >= NAMSIZ) + write_long (name, LF_LONGNAME); + + header = (union record *) findrec (); + bzero (header->charptr, sizeof (*header)); /* XXX speed up */ + + /* + * Check the file name and put it in the record. + */ + if (!f_absolute_paths) + { + static int warned_once = 0; +#ifdef __MSDOS__ + if (name[1] == ':') + { + name += 2; + if (!warned_once++) + msg ("Removing drive spec from names in the archive"); + } +#endif + while ('/' == *name) + { + name++; /* Force relative path */ + if (!warned_once++) + msg ("Removing leading / from absolute path names in the archive."); + } + } + current_file_name = name; + strncpy (header->header.arch_name, name, NAMSIZ); + header->header.arch_name[NAMSIZ - 1] = '\0'; + + to_oct ((long) (f_oldarch ? (st->st_mode & 07777) : st->st_mode), + 8, header->header.mode); + to_oct ((long) st->st_uid, 8, header->header.uid); + to_oct ((long) st->st_gid, 8, header->header.gid); + to_oct ((long) st->st_size, 1 + 12, header->header.size); + to_oct ((long) st->st_mtime, 1 + 12, header->header.mtime); + /* header->header.linkflag is left as null */ + if (f_gnudump) + { + to_oct ((long) st->st_atime, 1 + 12, header->header.atime); + to_oct ((long) st->st_ctime, 1 + 12, header->header.ctime); + } + +#ifndef NONAMES + /* Fill in new Unix Standard fields if desired. */ + if (f_standard) + { + header->header.linkflag = LF_NORMAL; /* New default */ + strcpy (header->header.magic, TMAGIC); /* Mark as Unix Std */ + finduname (header->header.uname, st->st_uid); + findgname (header->header.gname, st->st_gid); + } +#endif + return header; +} + +/* + * Finish off a filled-in header block and write it out. + * We also print the file name and/or full info if verbose is on. + */ +void +finish_header (header) + register union record *header; +{ + register int i, sum; + register char *p; + + bcopy (CHKBLANKS, header->header.chksum, sizeof (header->header.chksum)); + + sum = 0; + p = header->charptr; + for (i = sizeof (*header); --i >= 0;) + { + /* + * We can't use unsigned char here because of old compilers, + * e.g. V7. + */ + sum += 0xFF & *p++; + } + + /* + * Fill in the checksum field. It's formatted differently + * from the other fields: it has [6] digits, a null, then a + * space -- rather than digits, a space, then a null. + * We use to_oct then write the null in over to_oct's space. + * The final space is already there, from checksumming, and + * to_oct doesn't modify it. + * + * This is a fast way to do: + * (void) sprintf(header->header.chksum, "%6o", sum); + */ + to_oct ((long) sum, 8, header->header.chksum); + header->header.chksum[6] = '\0'; /* Zap the space */ + + userec (header); + + if (f_verbose) + { + extern union record *head;/* Points to current tape header */ + extern int head_standard; /* Tape header is in ANSI format */ + + /* These globals are parameters to print_header, sigh */ + head = header; + /* hstat is already set up */ + head_standard = f_standard; + print_header (); + } + + return; +} + + +/* + * Quick and dirty octal conversion. + * Converts long "value" into a "digs"-digit field at "where", + * including a trailing space and room for a null. "digs"==3 means + * 1 digit, a space, and room for a null. + * + * We assume the trailing null is already there and don't fill it in. + * This fact is used by start_header and finish_header, so don't change it! + * + * This should be equivalent to: + * (void) sprintf(where, "%*lo ", digs-2, value); + * except that sprintf fills in the trailing null and we don't. + */ +void +to_oct (value, digs, where) + register long value; + register int digs; + register char *where; +{ + + --digs; /* Trailing null slot is left alone */ + where[--digs] = ' '; /* Put in the space, though */ + + /* Produce the digits -- at least one */ + do + { + where[--digs] = '0' + (char) (value & 7); /* one octal digit */ + value >>= 3; + } + while (digs > 0 && value != 0); + + /* Leading spaces, if necessary */ + while (digs > 0) + where[--digs] = ' '; + +} + + +/* + * Write the EOT record(s). + * We actually zero at least one record, through the end of the block. + * Old tar writes garbage after two zeroed records -- and PDtar used to. + */ +void +write_eot () +{ + union record *p; + int bufsize; + + p = findrec (); + if (p) + { + bufsize = endofrecs ()->charptr - p->charptr; + bzero (p->charptr, bufsize); + userec (p); + } +} + +/* Write a LF_LONGLINK or LF_LONGNAME record. */ +void +write_long (p, type) + char *p; + char type; +{ + int size = strlen (p) + 1; + int bufsize; + union record *header; + struct stat foo; + + + bzero (&foo, sizeof foo); + foo.st_size = size; + + header = start_header ("././@LongLink", &foo); + header->header.linkflag = type; + finish_header (header); + + header = findrec (); + + bufsize = endofrecs ()->charptr - header->charptr; + + while (bufsize < size) + { + bcopy (p, header->charptr, bufsize); + p += bufsize; + size -= bufsize; + userec (header + (bufsize - 1) / RECORDSIZE); + header = findrec (); + bufsize = endofrecs ()->charptr - header->charptr; + } + bcopy (p, header->charptr, size); + bzero (header->charptr + size, bufsize - size); + userec (header + (size - 1) / RECORDSIZE); +} diff --git a/gnu/usr.bin/tar/diffarch.c b/gnu/usr.bin/tar/diffarch.c new file mode 100644 index 0000000..ce47d9d --- /dev/null +++ b/gnu/usr.bin/tar/diffarch.c @@ -0,0 +1,759 @@ +/* Diff files from a tar archive. + Copyright (C) 1988, 1992, 1993 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * Diff files from a tar archive. + * + * Written 30 April 1987 by John Gilmore, ihnp4!hoptoad!gnu. + */ + +#include <stdio.h> +#include <errno.h> +#ifndef STDC_HEADERS +extern int errno; +#endif +#include <sys/types.h> + +#ifdef BSD42 +#include <sys/file.h> +#else +#ifndef V7 +#include <fcntl.h> +#endif +#endif + +#ifdef HAVE_SYS_MTIO_H +#include <sys/ioctl.h> +#include <sys/mtio.h> +#endif + +#include "tar.h" +#include "port.h" +#include "rmt.h" + +#ifndef S_ISLNK +#define lstat stat +#endif + +extern void *valloc (); + +extern union record *head; /* Points to current tape header */ +extern struct stat hstat; /* Stat struct corresponding */ +extern int head_standard; /* Tape header is in ANSI format */ + +void decode_header (); +void diff_sparse_files (); +void fill_in_sparse_array (); +void fl_read (); +long from_oct (); +int do_stat (); +extern void print_header (); +int read_header (); +void saverec (); +void sigh (); +extern void skip_file (); +extern void skip_extended_headers (); +int wantbytes (); + +extern FILE *msg_file; + +int now_verifying = 0; /* Are we verifying at the moment? */ + +int diff_fd; /* Descriptor of file we're diffing */ + +char *diff_buf = 0; /* Pointer to area for reading + file contents into */ + +char *diff_dir; /* Directory contents for LF_DUMPDIR */ + +int different = 0; + +/*struct sp_array *sparsearray; +int sp_ar_size = 10;*/ +/* + * Initialize for a diff operation + */ +void +diff_init () +{ + /*NOSTRICT*/ + diff_buf = (char *) valloc ((unsigned) blocksize); + if (!diff_buf) + { + msg ("could not allocate memory for diff buffer of %d bytes", + blocksize); + exit (EX_ARGSBAD); + } +} + +/* + * Diff a file against the archive. + */ +void +diff_archive () +{ + register char *data; + int check, namelen; + int err; + long offset; + struct stat filestat; + int compare_chunk (); + int compare_dir (); + int no_op (); +#ifndef __MSDOS__ + dev_t dev; + ino_t ino; +#endif + char *get_dir_contents (); + long from_oct (); + + errno = EPIPE; /* FIXME, remove perrors */ + + saverec (&head); /* Make sure it sticks around */ + userec (head); /* And go past it in the archive */ + decode_header (head, &hstat, &head_standard, 1); /* Snarf fields */ + + /* Print the record from 'head' and 'hstat' */ + if (f_verbose) + { + if (now_verifying) + fprintf (msg_file, "Verify "); + print_header (); + } + + switch (head->header.linkflag) + { + + default: + msg ("Unknown file type '%c' for %s, diffed as normal file", + head->header.linkflag, current_file_name); + /* FALL THRU */ + + case LF_OLDNORMAL: + case LF_NORMAL: + case LF_SPARSE: + case LF_CONTIG: + /* + * Appears to be a file. + * See if it's really a directory. + */ + namelen = strlen (current_file_name) - 1; + if (current_file_name[namelen] == '/') + goto really_dir; + + + if (do_stat (&filestat)) + { + if (head->header.isextended) + skip_extended_headers (); + skip_file ((long) hstat.st_size); + different++; + goto quit; + } + + if (!S_ISREG (filestat.st_mode)) + { + fprintf (msg_file, "%s: not a regular file\n", + current_file_name); + skip_file ((long) hstat.st_size); + different++; + goto quit; + } + + filestat.st_mode &= 07777; + if (filestat.st_mode != hstat.st_mode) + sigh ("mode"); + if (filestat.st_uid != hstat.st_uid) + sigh ("uid"); + if (filestat.st_gid != hstat.st_gid) + sigh ("gid"); + if (filestat.st_mtime != hstat.st_mtime) + sigh ("mod time"); + if (head->header.linkflag != LF_SPARSE && + filestat.st_size != hstat.st_size) + { + sigh ("size"); + skip_file ((long) hstat.st_size); + goto quit; + } + + diff_fd = open (current_file_name, O_NDELAY | O_RDONLY | O_BINARY); + + if (diff_fd < 0 && !f_absolute_paths) + { + char tmpbuf[NAMSIZ + 2]; + + tmpbuf[0] = '/'; + strcpy (&tmpbuf[1], current_file_name); + diff_fd = open (tmpbuf, O_NDELAY | O_RDONLY); + } + if (diff_fd < 0) + { + msg_perror ("cannot open %s", current_file_name); + if (head->header.isextended) + skip_extended_headers (); + skip_file ((long) hstat.st_size); + different++; + goto quit; + } + /* + * Need to treat sparse files completely differently here. + */ + if (head->header.linkflag == LF_SPARSE) + diff_sparse_files (hstat.st_size); + else + wantbytes ((long) (hstat.st_size), compare_chunk); + + check = close (diff_fd); + if (check < 0) + msg_perror ("Error while closing %s", current_file_name); + + quit: + break; + +#ifndef __MSDOS__ + case LF_LINK: + if (do_stat (&filestat)) + break; + dev = filestat.st_dev; + ino = filestat.st_ino; + err = stat (current_link_name, &filestat); + if (err < 0) + { + if (errno == ENOENT) + { + fprintf (msg_file, "%s: does not exist\n", current_file_name); + } + else + { + msg_perror ("cannot stat file %s", current_file_name); + } + different++; + break; + } + if (filestat.st_dev != dev || filestat.st_ino != ino) + { + fprintf (msg_file, "%s not linked to %s\n", current_file_name, current_link_name); + break; + } + break; +#endif + +#ifdef S_ISLNK + case LF_SYMLINK: + { + char linkbuf[NAMSIZ + 3]; + check = readlink (current_file_name, linkbuf, + (sizeof linkbuf) - 1); + + if (check < 0) + { + if (errno == ENOENT) + { + fprintf (msg_file, + "%s: no such file or directory\n", + current_file_name); + } + else + { + msg_perror ("cannot read link %s", current_file_name); + } + different++; + break; + } + + linkbuf[check] = '\0'; /* Null-terminate it */ + if (strncmp (current_link_name, linkbuf, check) != 0) + { + fprintf (msg_file, "%s: symlink differs\n", + current_link_name); + different++; + } + } + break; +#endif + +#ifdef S_IFCHR + case LF_CHR: + hstat.st_mode |= S_IFCHR; + goto check_node; +#endif + +#ifdef S_IFBLK + /* If local system doesn't support block devices, use default case */ + case LF_BLK: + hstat.st_mode |= S_IFBLK; + goto check_node; +#endif + +#ifdef S_ISFIFO + /* If local system doesn't support FIFOs, use default case */ + case LF_FIFO: +#ifdef S_IFIFO + hstat.st_mode |= S_IFIFO; +#endif + hstat.st_rdev = 0; /* FIXME, do we need this? */ + goto check_node; +#endif + + check_node: + /* FIXME, deal with umask */ + if (do_stat (&filestat)) + break; + if (hstat.st_rdev != filestat.st_rdev) + { + fprintf (msg_file, "%s: device numbers changed\n", current_file_name); + different++; + break; + } +#ifdef S_IFMT + if (hstat.st_mode != filestat.st_mode) +#else /* POSIX lossage */ + if ((hstat.st_mode & 07777) != (filestat.st_mode & 07777)) +#endif + { + fprintf (msg_file, "%s: mode or device-type changed\n", current_file_name); + different++; + break; + } + break; + + case LF_DUMPDIR: + data = diff_dir = get_dir_contents (current_file_name, 0); + if (data) + { + wantbytes ((long) (hstat.st_size), compare_dir); + free (data); + } + else + wantbytes ((long) (hstat.st_size), no_op); + /* FALL THROUGH */ + + case LF_DIR: + /* Check for trailing / */ + namelen = strlen (current_file_name) - 1; + really_dir: + while (namelen && current_file_name[namelen] == '/') + current_file_name[namelen--] = '\0'; /* Zap / */ + + if (do_stat (&filestat)) + break; + if (!S_ISDIR (filestat.st_mode)) + { + fprintf (msg_file, "%s is no longer a directory\n", current_file_name); + different++; + break; + } + if ((filestat.st_mode & 07777) != (hstat.st_mode & 07777)) + sigh ("mode"); + break; + + case LF_VOLHDR: + break; + + case LF_MULTIVOL: + namelen = strlen (current_file_name) - 1; + if (current_file_name[namelen] == '/') + goto really_dir; + + if (do_stat (&filestat)) + break; + + if (!S_ISREG (filestat.st_mode)) + { + fprintf (msg_file, "%s: not a regular file\n", + current_file_name); + skip_file ((long) hstat.st_size); + different++; + break; + } + + filestat.st_mode &= 07777; + offset = from_oct (1 + 12, head->header.offset); + if (filestat.st_size != hstat.st_size + offset) + { + sigh ("size"); + skip_file ((long) hstat.st_size); + different++; + break; + } + + diff_fd = open (current_file_name, O_NDELAY | O_RDONLY | O_BINARY); + + if (diff_fd < 0) + { + msg_perror ("cannot open file %s", current_file_name); + skip_file ((long) hstat.st_size); + different++; + break; + } + err = lseek (diff_fd, offset, 0); + if (err != offset) + { + msg_perror ("cannot seek to %ld in file %s", offset, current_file_name); + different++; + break; + } + + wantbytes ((long) (hstat.st_size), compare_chunk); + + check = close (diff_fd); + if (check < 0) + { + msg_perror ("Error while closing %s", current_file_name); + } + break; + + } + + /* We don't need to save it any longer. */ + saverec ((union record **) 0);/* Unsave it */ +} + +int +compare_chunk (bytes, buffer) + long bytes; + char *buffer; +{ + int err; + + err = read (diff_fd, diff_buf, bytes); + if (err != bytes) + { + if (err < 0) + { + msg_perror ("can't read %s", current_file_name); + } + else + { + fprintf (msg_file, "%s: could only read %d of %d bytes\n", current_file_name, err, bytes); + } + different++; + return -1; + } + if (bcmp (buffer, diff_buf, bytes)) + { + fprintf (msg_file, "%s: data differs\n", current_file_name); + different++; + return -1; + } + return 0; +} + +int +compare_dir (bytes, buffer) + long bytes; + char *buffer; +{ + if (bcmp (buffer, diff_dir, bytes)) + { + fprintf (msg_file, "%s: data differs\n", current_file_name); + different++; + return -1; + } + diff_dir += bytes; + return 0; +} + +/* + * Sigh about something that differs. + */ +void +sigh (what) + char *what; +{ + + fprintf (msg_file, "%s: %s differs\n", + current_file_name, what); +} + +void +verify_volume () +{ + int status; +#ifdef MTIOCTOP + struct mtop t; + int er; +#endif + + if (!diff_buf) + diff_init (); +#ifdef MTIOCTOP + t.mt_op = MTBSF; + t.mt_count = 1; + if ((er = rmtioctl (archive, MTIOCTOP, &t)) < 0) + { + if (errno != EIO || (er = rmtioctl (archive, MTIOCTOP, &t)) < 0) + { +#endif + if (rmtlseek (archive, 0L, 0) != 0) + { + /* Lseek failed. Try a different method */ + msg_perror ("Couldn't rewind archive file for verify"); + return; + } +#ifdef MTIOCTOP + } + } +#endif + ar_reading = 1; + now_verifying = 1; + fl_read (); + for (;;) + { + status = read_header (); + if (status == 0) + { + unsigned n; + + n = 0; + do + { + n++; + status = read_header (); + } + while (status == 0); + msg ("VERIFY FAILURE: %d invalid header%s detected!", n, n == 1 ? "" : "s"); + } + if (status == 2 || status == EOF) + break; + diff_archive (); + } + ar_reading = 0; + now_verifying = 0; + +} + +int +do_stat (statp) + struct stat *statp; +{ + int err; + + err = f_follow_links ? stat (current_file_name, statp) : lstat (current_file_name, statp); + if (err < 0) + { + if (errno == ENOENT) + { + fprintf (msg_file, "%s: does not exist\n", current_file_name); + } + else + msg_perror ("can't stat file %s", current_file_name); + /* skip_file((long)hstat.st_size); + different++;*/ + return 1; + } + else + return 0; +} + +/* + * JK + * Diff'ing a sparse file with its counterpart on the tar file is a + * bit of a different story than a normal file. First, we must know + * what areas of the file to skip through, i.e., we need to contruct + * a sparsearray, which will hold all the information we need. We must + * compare small amounts of data at a time as we find it. + */ + +void +diff_sparse_files (filesize) + int filesize; + +{ + int sparse_ind = 0; + char *buf; + int buf_size = RECORDSIZE; + union record *datarec; + int err; + long numbytes; + /* int amt_read = 0;*/ + int size = filesize; + + buf = (char *) ck_malloc (buf_size * sizeof (char)); + + fill_in_sparse_array (); + + + while (size > 0) + { + datarec = findrec (); + if (!sparsearray[sparse_ind].numbytes) + break; + + /* + * 'numbytes' is nicer to write than + * 'sparsearray[sparse_ind].numbytes' all the time ... + */ + numbytes = sparsearray[sparse_ind].numbytes; + + lseek (diff_fd, sparsearray[sparse_ind].offset, 0); + /* + * take care to not run out of room in our buffer + */ + while (buf_size < numbytes) + { + buf = (char *) ck_realloc (buf, buf_size * 2 * sizeof (char)); + buf_size *= 2; + } + while (numbytes > RECORDSIZE) + { + if ((err = read (diff_fd, buf, RECORDSIZE)) != RECORDSIZE) + { + if (err < 0) + msg_perror ("can't read %s", current_file_name); + else + fprintf (msg_file, "%s: could only read %d of %d bytes\n", + current_file_name, err, numbytes); + break; + } + if (bcmp (buf, datarec->charptr, RECORDSIZE)) + { + different++; + break; + } + numbytes -= err; + size -= err; + userec (datarec); + datarec = findrec (); + } + if ((err = read (diff_fd, buf, numbytes)) != numbytes) + { + if (err < 0) + msg_perror ("can't read %s", current_file_name); + else + fprintf (msg_file, "%s: could only read %d of %d bytes\n", + current_file_name, err, numbytes); + break; + } + + if (bcmp (buf, datarec->charptr, numbytes)) + { + different++; + break; + } + /* amt_read += numbytes; + if (amt_read >= RECORDSIZE) { + amt_read = 0; + userec(datarec); + datarec = findrec(); + }*/ + userec (datarec); + sparse_ind++; + size -= numbytes; + } + /* + * if the number of bytes read isn't the + * number of bytes supposedly in the file, + * they're different + */ + /* if (amt_read != filesize) + different++;*/ + userec (datarec); + free (sparsearray); + if (different) + fprintf (msg_file, "%s: data differs\n", current_file_name); + +} + +/* + * JK + * This routine should be used more often than it is ... look into + * that. Anyhow, what it does is translate the sparse information + * on the header, and in any subsequent extended headers, into an + * array of structures with true numbers, as opposed to character + * strings. It simply makes our life much easier, doing so many + * comparisong and such. + */ +void +fill_in_sparse_array () +{ + int ind; + + /* + * allocate space for our scratch space; it's initially + * 10 elements long, but can change in this routine if + * necessary + */ + sp_array_size = 10; + sparsearray = (struct sp_array *) ck_malloc (sp_array_size * sizeof (struct sp_array)); + + /* + * there are at most five of these structures in the header + * itself; read these in first + */ + for (ind = 0; ind < SPARSE_IN_HDR; ind++) + { + if (!head->header.sp[ind].numbytes) + break; + sparsearray[ind].offset = + from_oct (1 + 12, head->header.sp[ind].offset); + sparsearray[ind].numbytes = + from_oct (1 + 12, head->header.sp[ind].numbytes); + } + /* + * if the header's extended, we gotta read in exhdr's till + * we're done + */ + if (head->header.isextended) + { + /* how far into the sparsearray we are 'so far' */ + static int so_far_ind = SPARSE_IN_HDR; + union record *exhdr; + + for (;;) + { + exhdr = findrec (); + for (ind = 0; ind < SPARSE_EXT_HDR; ind++) + { + if (ind + so_far_ind > sp_array_size - 1) + { + /* + * we just ran out of room in our + * scratch area - realloc it + */ + sparsearray = (struct sp_array *) + ck_realloc (sparsearray, + sp_array_size * 2 * sizeof (struct sp_array)); + sp_array_size *= 2; + } + /* + * convert the character strings into longs + */ + sparsearray[ind + so_far_ind].offset = + from_oct (1 + 12, exhdr->ext_hdr.sp[ind].offset); + sparsearray[ind + so_far_ind].numbytes = + from_oct (1 + 12, exhdr->ext_hdr.sp[ind].numbytes); + } + /* + * if this is the last extended header for this + * file, we can stop + */ + if (!exhdr->ext_hdr.isextended) + break; + else + { + so_far_ind += SPARSE_EXT_HDR; + userec (exhdr); + } + } + /* be sure to skip past the last one */ + userec (exhdr); + } +} diff --git a/gnu/usr.bin/tar/extract.c b/gnu/usr.bin/tar/extract.c new file mode 100644 index 0000000..d162cab --- /dev/null +++ b/gnu/usr.bin/tar/extract.c @@ -0,0 +1,907 @@ +/* Extract files from a tar archive. + Copyright (C) 1988, 1992, 1993 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * Extract files from a tar archive. + * + * Written 19 Nov 1985 by John Gilmore, ihnp4!hoptoad!gnu. + */ + +#include <stdio.h> +#include <errno.h> +#ifndef STDC_HEADERS +extern int errno; +#endif +#include <sys/types.h> +#include <time.h> +time_t time (); + +#ifdef BSD42 +#include <sys/file.h> +#else +#ifndef V7 +#include <fcntl.h> +#endif +#endif + +#ifdef NO_OPEN3 +/* We need the #define's even though we don't use them. */ +#include "open3.h" +#endif + +#ifdef EMUL_OPEN3 +/* Simulated 3-argument open for systems that don't have it */ +#include "open3.h" +#endif + +#include "tar.h" +#include "port.h" + +#if defined(_POSIX_VERSION) +#include <utime.h> +#else +struct utimbuf +{ + long actime; + long modtime; +}; + +#endif + +extern FILE *msg_file; + +extern union record *head; /* Points to current tape header */ +extern struct stat hstat; /* Stat struct corresponding */ +extern int head_standard; /* Tape header is in ANSI format */ + +extern char *save_name; +extern long save_totsize; +extern long save_sizeleft; + +int confirm (); +void decode_header (); +void extract_mangle (); +void extract_sparse_file (); +long from_oct (); +void gnu_restore (); +extern void print_header (); +extern void skip_file (); +extern void skip_extended_headers (); +extern void pr_mkdir (); +void saverec (); + +int make_dirs (); /* Makes required directories */ + +static time_t now = 0; /* Current time */ +static we_are_root = 0; /* True if our effective uid == 0 */ +static int notumask = ~0; /* Masks out bits user doesn't want */ + +/* + * "Scratch" space to store the information about a sparse file before + * writing the info into the header or extended header + */ +/*struct sp_array *sparsearray;*/ + +/* number of elts storable in the sparsearray */ +/*int sp_array_size = 10;*/ + +struct saved_dir_info +{ + char *path; + int mode; + int atime; + int mtime; + struct saved_dir_info *next; +}; + +struct saved_dir_info *saved_dir_info_head; + +/* + * Set up to extract files. + */ +void +extr_init () +{ + int ourmask; + + now = time ((time_t *) 0); + if (geteuid () == 0) + we_are_root = 1; + + /* + * We need to know our umask. But if f_use_protection is set, + * leave our kernel umask at 0, and our "notumask" at ~0. + */ + ourmask = umask (0); /* Read it */ + if (!f_use_protection) + { + (void) umask (ourmask); /* Set it back how it was */ + notumask = ~ourmask; /* Make umask override permissions */ + } +} + + +/* + * Extract a file from the archive. + */ +void +extract_archive () +{ + register char *data; + int fd, check, namelen, written, openflag; + long size; + struct utimbuf acc_upd_times; + register int skipcrud; + register int i; + /* int sparse_ind = 0;*/ + union record *exhdr; + struct saved_dir_info *tmp; + /* int end_nulls; */ + + saverec (&head); /* Make sure it sticks around */ + userec (head); /* And go past it in the archive */ + decode_header (head, &hstat, &head_standard, 1); /* Snarf fields */ + + if (f_confirm && !confirm ("extract", current_file_name)) + { + if (head->header.isextended) + skip_extended_headers (); + skip_file ((long) hstat.st_size); + saverec ((union record **) 0); + return; + } + + /* Print the record from 'head' and 'hstat' */ + if (f_verbose) + print_header (); + + /* + * Check for fully specified pathnames and other atrocities. + * + * Note, we can't just make a pointer to the new file name, + * since saverec() might move the header and adjust "head". + * We have to start from "head" every time we want to touch + * the header record. + */ + skipcrud = 0; + while (!f_absolute_paths + && '/' == current_file_name[skipcrud]) + { + static int warned_once = 0; + + skipcrud++; /* Force relative path */ + if (!warned_once++) + { + msg ("Removing leading / from absolute path names in the archive."); + } + } + + switch (head->header.linkflag) + { + + default: + msg ("Unknown file type '%c' for %s, extracted as normal file", + head->header.linkflag, skipcrud + current_file_name); + /* FALL THRU */ + + /* + * JK - What we want to do if the file is sparse is loop through + * the array of sparse structures in the header and read in + * and translate the character strings representing 1) the offset + * at which to write and 2) how many bytes to write into numbers, + * which we store into the scratch array, "sparsearray". This + * array makes our life easier the same way it did in creating + * the tar file that had to deal with a sparse file. + * + * After we read in the first five (at most) sparse structures, + * we check to see if the file has an extended header, i.e., + * if more sparse structures are needed to describe the contents + * of the new file. If so, we read in the extended headers + * and continue to store their contents into the sparsearray. + */ + case LF_SPARSE: + sp_array_size = 10; + sparsearray = (struct sp_array *) ck_malloc (sp_array_size * sizeof (struct sp_array)); + for (i = 0; i < SPARSE_IN_HDR; i++) + { + sparsearray[i].offset = + from_oct (1 + 12, head->header.sp[i].offset); + sparsearray[i].numbytes = + from_oct (1 + 12, head->header.sp[i].numbytes); + if (!sparsearray[i].numbytes) + break; + } + + /* end_nulls = from_oct(1+12, head->header.ending_blanks);*/ + + if (head->header.isextended) + { + /* read in the list of extended headers + and translate them into the sparsearray + as before */ + + /* static */ int ind = SPARSE_IN_HDR; + + for (;;) + { + + exhdr = findrec (); + for (i = 0; i < SPARSE_EXT_HDR; i++) + { + + if (i + ind > sp_array_size - 1) + { + /* + * realloc the scratch area + * since we've run out of room -- + */ + sparsearray = (struct sp_array *) + ck_realloc (sparsearray, + 2 * sp_array_size * (sizeof (struct sp_array))); + sp_array_size *= 2; + } + if (!exhdr->ext_hdr.sp[i].numbytes) + break; + sparsearray[i + ind].offset = + from_oct (1 + 12, exhdr->ext_hdr.sp[i].offset); + sparsearray[i + ind].numbytes = + from_oct (1 + 12, exhdr->ext_hdr.sp[i].numbytes); + } + if (!exhdr->ext_hdr.isextended) + break; + else + { + ind += SPARSE_EXT_HDR; + userec (exhdr); + } + } + userec (exhdr); + } + + /* FALL THRU */ + case LF_OLDNORMAL: + case LF_NORMAL: + case LF_CONTIG: + /* + * Appears to be a file. + * See if it's really a directory. + */ + namelen = strlen (skipcrud + current_file_name) - 1; + if (current_file_name[skipcrud + namelen] == '/') + goto really_dir; + + /* FIXME, deal with protection issues */ + again_file: + openflag = (f_keep ? + O_BINARY | O_NDELAY | O_WRONLY | O_CREAT | O_EXCL : + O_BINARY | O_NDELAY | O_WRONLY | O_CREAT | O_TRUNC) + | ((head->header.linkflag == LF_SPARSE) ? 0 : O_APPEND); + /* + * JK - The last | is a kludge to solve the problem + * the O_APPEND flag causes with files we are + * trying to make sparse: when a file is opened + * with O_APPEND, it writes to the last place + * that something was written, thereby ignoring + * any lseeks that we have done. We add this + * extra condition to make it able to lseek when + * a file is sparse, i.e., we don't open the new + * file with this flag. (Grump -- this bug caused + * me to waste a good deal of time, I might add) + */ + + if (f_exstdout) + { + fd = 1; + goto extract_file; + } +#ifdef O_CTG + /* + * Contiguous files (on the Masscomp) have to specify + * the size in the open call that creates them. + */ + if (head->header.linkflag == LF_CONTIG) + fd = open ((longname ? longname : head->header.name) + + skipcrud, + openflag | O_CTG, + hstat.st_mode, hstat.st_size); + else +#endif + { +#ifdef NO_OPEN3 + /* + * On raw V7 we won't let them specify -k (f_keep), but + * we just bull ahead and create the files. + */ + fd = creat ((longname + ? longname + : head->header.name) + skipcrud, + hstat.st_mode); +#else + /* + * With 3-arg open(), we can do this up right. + */ + fd = open (skipcrud + current_file_name, + openflag, hstat.st_mode); +#endif + } + + if (fd < 0) + { + if (make_dirs (skipcrud + current_file_name)) + goto again_file; + msg_perror ("Could not create file %s", + skipcrud + current_file_name); + if (head->header.isextended) + skip_extended_headers (); + skip_file ((long) hstat.st_size); + goto quit; + } + + extract_file: + if (head->header.linkflag == LF_SPARSE) + { + char *name; + int namelen; + + /* + * Kludge alert. NAME is assigned to header.name + * because during the extraction, the space that + * contains the header will get scribbled on, and + * the name will get munged, so any error messages + * that happen to contain the filename will look + * REAL interesting unless we do this. + */ + namelen = strlen (skipcrud + current_file_name) + 1; + name = (char *) ck_malloc ((sizeof (char)) * namelen); + bcopy (skipcrud + current_file_name, name, namelen); + size = hstat.st_size; + extract_sparse_file (fd, &size, hstat.st_size, name); + } + else + for (size = hstat.st_size; + size > 0; + size -= written) + { + + /* long offset, + numbytes;*/ + + if (f_multivol) + { + save_name = current_file_name; + save_totsize = hstat.st_size; + save_sizeleft = size; + } + + /* + * Locate data, determine max length + * writeable, write it, record that + * we have used the data, then check + * if the write worked. + */ + data = findrec ()->charptr; + if (data == NULL) + { /* Check it... */ + msg ("Unexpected EOF on archive file"); + break; + } + /* + * JK - If the file is sparse, use the sparsearray + * that we created before to lseek into the new + * file the proper amount, and to see how many + * bytes we want to write at that position. + */ + /* if (head->header.linkflag == LF_SPARSE) { + off_t pos; + + pos = lseek(fd, (off_t) sparsearray[sparse_ind].offset, 0); + printf("%d at %d\n", (int) pos, sparse_ind); + written = sparsearray[sparse_ind++].numbytes; + } else*/ + written = endofrecs ()->charptr - data; + if (written > size) + written = size; + errno = 0; + check = write (fd, data, written); + /* + * The following is in violation of strict + * typing, since the arg to userec + * should be a struct rec *. FIXME. + */ + userec ((union record *) (data + written - 1)); + if (check == written) + continue; + /* + * Error in writing to file. + * Print it, skip to next file in archive. + */ + if (check < 0) + msg_perror ("couldn't write to file %s", + skipcrud + current_file_name); + else + msg ("could only write %d of %d bytes to file %s", + check, written, skipcrud + current_file_name); + skip_file ((long) (size - written)); + break; /* Still do the close, mod time, chmod, etc */ + } + + if (f_multivol) + save_name = 0; + + /* If writing to stdout, don't try to do anything + to the filename; it doesn't exist, or we don't + want to touch it anyway */ + if (f_exstdout) + break; + + /* if (head->header.isextended) { + register union record *exhdr; + register int i; + + for (i = 0; i < 21; i++) { + long offset; + + if (!exhdr->ext_hdr.sp[i].numbytes) + break; + offset = from_oct(1+12, + exhdr->ext_hdr.sp[i].offset); + written = from_oct(1+12, + exhdr->ext_hdr.sp[i].numbytes); + lseek(fd, offset, 0); + check = write(fd, data, written); + if (check == written) continue; + + } + + + }*/ + check = close (fd); + if (check < 0) + { + msg_perror ("Error while closing %s", + skipcrud + current_file_name); + } + + + set_filestat: + + /* + * If we are root, set the owner and group of the extracted + * file. This does what is wanted both on real Unix and on + * System V. If we are running as a user, we extract as that + * user; if running as root, we extract as the original owner. + */ + if (we_are_root || f_do_chown) + { + if (chown (skipcrud + current_file_name, + hstat.st_uid, hstat.st_gid) < 0) + { + msg_perror ("cannot chown file %s to uid %d gid %d", + skipcrud + current_file_name, + hstat.st_uid, hstat.st_gid); + } + } + + /* + * Set the modified time of the file. + * + * Note that we set the accessed time to "now", which + * is really "the time we started extracting files". + * unless f_gnudump is used, in which case .st_atime is used + */ + if (!f_modified) + { + /* fixme if f_gnudump should set ctime too, but how? */ + if (f_gnudump) + acc_upd_times.actime = hstat.st_atime; + else + acc_upd_times.actime = now; /* Accessed now */ + acc_upd_times.modtime = hstat.st_mtime; /* Mod'd */ + if (utime (skipcrud + current_file_name, + &acc_upd_times) < 0) + { + msg_perror ("couldn't change access and modification times of %s", skipcrud + current_file_name); + } + } + /* We do the utime before the chmod because some versions of + utime are broken and trash the modes of the file. Since + we then change the mode anyway, we don't care. . . */ + + /* + * If '-k' is not set, open() or creat() could have saved + * the permission bits from a previously created file, + * ignoring the ones we specified. + * Even if -k is set, if the file has abnormal + * mode bits, we must chmod since writing or chown() has + * probably reset them. + * + * If -k is set, we know *we* created this file, so the mode + * bits were set by our open(). If the file is "normal", we + * skip the chmod. This works because we did umask(0) if -p + * is set, so umask will have left the specified mode alone. + */ + if ((!f_keep) + || (hstat.st_mode & (S_ISUID | S_ISGID | S_ISVTX))) + { + if (chmod (skipcrud + current_file_name, + notumask & (int) hstat.st_mode) < 0) + { + msg_perror ("cannot change mode of file %s to %ld", + skipcrud + current_file_name, + notumask & (int) hstat.st_mode); + } + } + + quit: + break; + + case LF_LINK: + again_link: + { + struct stat st1, st2; + + check = link (current_link_name, skipcrud + current_file_name); + + if (check == 0) + break; + if (make_dirs (skipcrud + current_file_name)) + goto again_link; + if (f_gnudump && errno == EEXIST) + break; + if (stat (current_link_name, &st1) == 0 + && stat (current_file_name + skipcrud, &st2) == 0 + && st1.st_dev == st2.st_dev + && st1.st_ino == st2.st_ino) + break; + msg_perror ("Could not link %s to %s", + skipcrud + current_file_name, + current_link_name); + } + break; + +#ifdef S_ISLNK + case LF_SYMLINK: + again_symlink: + check = symlink (current_link_name, + skipcrud + current_file_name); + /* FIXME, don't worry uid, gid, etc... */ + if (check == 0) + break; + if (make_dirs (current_file_name + skipcrud)) + goto again_symlink; + msg_perror ("Could not create symlink to %s", + current_link_name); + break; +#endif + +#ifdef S_IFCHR + case LF_CHR: + hstat.st_mode |= S_IFCHR; + goto make_node; +#endif + +#ifdef S_IFBLK + case LF_BLK: + hstat.st_mode |= S_IFBLK; +#endif +#if defined(S_IFCHR) || defined(S_IFBLK) + make_node: + check = mknod (current_file_name + skipcrud, + (int) hstat.st_mode, (int) hstat.st_rdev); + if (check != 0) + { + if (make_dirs (skipcrud + current_file_name)) + goto make_node; + msg_perror ("Could not make %s", + current_file_name + skipcrud); + break; + }; + goto set_filestat; +#endif + +#ifdef S_ISFIFO + /* If local system doesn't support FIFOs, use default case */ + case LF_FIFO: + make_fifo: + check = mkfifo (current_file_name + skipcrud, + (int) hstat.st_mode); + if (check != 0) + { + if (make_dirs (current_file_name + skipcrud)) + goto make_fifo; + msg_perror ("Could not make %s", + skipcrud + current_file_name); + break; + }; + goto set_filestat; +#endif + + case LF_DIR: + case LF_DUMPDIR: + namelen = strlen (current_file_name + skipcrud) - 1; + really_dir: + /* Check for trailing /, and zap as many as we find. */ + while (namelen + && current_file_name[skipcrud + namelen] == '/') + current_file_name[skipcrud + namelen--] = '\0'; + if (f_gnudump) + { /* Read the entry and delete files + that aren't listed in the archive */ + gnu_restore (skipcrud); + + } + else if (head->header.linkflag == LF_DUMPDIR) + skip_file ((long) (hstat.st_size)); + + + again_dir: + check = mkdir (skipcrud + current_file_name, + (we_are_root ? 0 : 0300) | (int) hstat.st_mode); + if (check != 0) + { + struct stat st1; + + if (make_dirs (skipcrud + current_file_name)) + goto again_dir; + /* If we're trying to create '.', let it be. */ + if (current_file_name[skipcrud + namelen] == '.' && + (namelen == 0 || + current_file_name[skipcrud + namelen - 1] == '/')) + goto check_perms; + if (errno == EEXIST + && stat (skipcrud + current_file_name, &st1) == 0 + && (S_ISDIR (st1.st_mode))) + break; + msg_perror ("Could not create directory %s", skipcrud + current_file_name); + break; + } + + check_perms: + if (!we_are_root && 0300 != (0300 & (int) hstat.st_mode)) + { + hstat.st_mode |= 0300; + msg ("Added write and execute permission to directory %s", + skipcrud + current_file_name); + } + + /* + * If we are root, set the owner and group of the extracted + * file. This does what is wanted both on real Unix and on + * System V. If we are running as a user, we extract as that + * user; if running as root, we extract as the original owner. + */ + if (we_are_root || f_do_chown) + { + if (chown (skipcrud + current_file_name, + hstat.st_uid, hstat.st_gid) < 0) + { + msg_perror ("cannot chown file %s to uid %d gid %d", + skipcrud + current_file_name, + hstat.st_uid, hstat.st_gid); + } + } + + if (!f_modified) + { + tmp = ((struct saved_dir_info *) + ck_malloc (sizeof (struct saved_dir_info))); + tmp->path = (char *) ck_malloc (strlen (skipcrud + + current_file_name) + 1); + strcpy (tmp->path, skipcrud + current_file_name); + tmp->mode = hstat.st_mode; + tmp->atime = hstat.st_atime; + tmp->mtime = hstat.st_mtime; + tmp->next = saved_dir_info_head; + saved_dir_info_head = tmp; + } + else + { + /* This functions exactly as the code for set_filestat above. */ + if ((!f_keep) + || (hstat.st_mode & (S_ISUID | S_ISGID | S_ISVTX))) + { + if (chmod (skipcrud + current_file_name, + notumask & (int) hstat.st_mode) < 0) + { + msg_perror ("cannot change mode of file %s to %ld", + skipcrud + current_file_name, + notumask & (int) hstat.st_mode); + } + } + } + break; + + case LF_VOLHDR: + if (f_verbose) + { + printf ("Reading %s\n", current_file_name); + } + break; + + case LF_NAMES: + extract_mangle (head); + break; + + case LF_MULTIVOL: + msg ("Can't extract '%s'--file is continued from another volume\n", current_file_name); + skip_file ((long) hstat.st_size); + break; + + case LF_LONGNAME: + case LF_LONGLINK: + msg ("Visible long name error\n"); + skip_file ((long) hstat.st_size); + break; + } + + /* We don't need to save it any longer. */ + saverec ((union record **) 0);/* Unsave it */ +} + +/* + * After a file/link/symlink/dir creation has failed, see if + * it's because some required directory was not present, and if + * so, create all required dirs. + */ +int +make_dirs (pathname) + char *pathname; +{ + char *p; /* Points into path */ + int madeone = 0; /* Did we do anything yet? */ + int save_errno = errno; /* Remember caller's errno */ + int check; + + if (errno != ENOENT) + return 0; /* Not our problem */ + + for (p = index (pathname, '/'); p != NULL; p = index (p + 1, '/')) + { + /* Avoid mkdir of empty string, if leading or double '/' */ + if (p == pathname || p[-1] == '/') + continue; + /* Avoid mkdir where last part of path is '.' */ + if (p[-1] == '.' && (p == pathname + 1 || p[-2] == '/')) + continue; + *p = 0; /* Truncate the path there */ + check = mkdir (pathname, 0777); /* Try to create it as a dir */ + if (check == 0) + { + /* Fix ownership */ + if (we_are_root) + { + if (chown (pathname, hstat.st_uid, + hstat.st_gid) < 0) + { + msg_perror ("cannot change owner of %s to uid %d gid %d", pathname, hstat.st_uid, hstat.st_gid); + } + } + pr_mkdir (pathname, p - pathname, notumask & 0777); + madeone++; /* Remember if we made one */ + *p = '/'; + continue; + } + *p = '/'; + if (errno == EEXIST) /* Directory already exists */ + continue; + /* + * Some other error in the mkdir. We return to the caller. + */ + break; + } + + errno = save_errno; /* Restore caller's errno */ + return madeone; /* Tell them to retry if we made one */ +} + +void +extract_sparse_file (fd, sizeleft, totalsize, name) + int fd; + long *sizeleft, totalsize; + char *name; +{ + /* register char *data;*/ + union record *datarec; + int sparse_ind = 0; + int written, count; + + /* assuming sizeleft is initially totalsize */ + + + while (*sizeleft > 0) + { + datarec = findrec (); + if (datarec == NULL) + { + msg ("Unexpected EOF on archive file"); + return; + } + lseek (fd, sparsearray[sparse_ind].offset, 0); + written = sparsearray[sparse_ind++].numbytes; + while (written > RECORDSIZE) + { + count = write (fd, datarec->charptr, RECORDSIZE); + if (count < 0) + msg_perror ("couldn't write to file %s", name); + written -= count; + *sizeleft -= count; + userec (datarec); + datarec = findrec (); + } + + count = write (fd, datarec->charptr, written); + + if (count < 0) + { + msg_perror ("couldn't write to file %s", name); + } + else if (count != written) + { + msg ("could only write %d of %d bytes to file %s", count, + totalsize, name); + skip_file ((long) (*sizeleft)); + } + + written -= count; + *sizeleft -= count; + userec (datarec); + } + free (sparsearray); + /* if (end_nulls) { + register int i; + + printf("%d\n", (int) end_nulls); + for (i = 0; i < end_nulls; i++) + write(fd, "\000", 1); + }*/ + userec (datarec); +} + +/* Set back the utime and mode for all the extracted directories. */ +void +restore_saved_dir_info () +{ + struct utimbuf acc_upd_times; + + while (saved_dir_info_head != NULL) + { + /* fixme if f_gnudump should set ctime too, but how? */ + if (f_gnudump) + acc_upd_times.actime = saved_dir_info_head->atime; + else + acc_upd_times.actime = now; /* Accessed now */ + acc_upd_times.modtime = saved_dir_info_head->mtime; /* Mod'd */ + if (utime (saved_dir_info_head->path, &acc_upd_times) < 0) + { + msg_perror ("couldn't change access and modification times of %s", + saved_dir_info_head->path); + } + if ((!f_keep) || (saved_dir_info_head->mode & (S_ISUID | S_ISGID | S_ISVTX))) + { + if (chmod (saved_dir_info_head->path, + notumask & saved_dir_info_head->mode) < 0) + { + msg_perror ("cannot change mode of file %s to %ld", + saved_dir_info_head->path, + notumask & saved_dir_info_head->mode); + } + } + saved_dir_info_head = saved_dir_info_head->next; + } +} diff --git a/gnu/usr.bin/tar/getdate.y b/gnu/usr.bin/tar/getdate.y new file mode 100644 index 0000000..7b0ac79 --- /dev/null +++ b/gnu/usr.bin/tar/getdate.y @@ -0,0 +1,969 @@ +%{ +/* $Revision: 2.1 $ +** +** Originally written by Steven M. Bellovin <smb@research.att.com> while +** at the University of North Carolina at Chapel Hill. Later tweaked by +** a couple of people on Usenet. Completely overhauled by Rich $alz +** <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990; +** send any email to Rich. +** +** This grammar has eight shift/reduce conflicts. +** +** This code is in the public domain and has no copyright. +*/ +/* SUPPRESS 287 on yaccpar_sccsid *//* Unusd static variable */ +/* SUPPRESS 288 on yyerrlab *//* Label unused */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else +#ifdef HAVE_ALLOCA_H +#include <alloca.h> +#else +#ifdef _AIX /* for Bison */ + #pragma alloca +#else +char *alloca (); +#endif +#endif +#endif + +#include <stdio.h> +#include <ctype.h> + +/* The code at the top of get_date which figures out the offset of the + current time zone checks various CPP symbols to see if special + tricks are need, but defaults to using the gettimeofday system call. + Include <sys/time.h> if that will be used. */ + +#if !defined (USG) && !defined (sgi) && !defined (__386BSD__) +#include <sys/time.h> +#endif + +#if defined(vms) + +#include <types.h> +#include <time.h> + +#else + +#include <sys/types.h> + +#if defined(USG) || !defined(HAVE_FTIME) +/* +** If you need to do a tzset() call to set the +** timezone, and don't have ftime(). +*/ +struct timeb { + time_t time; /* Seconds since the epoch */ + unsigned short millitm; /* Field not used */ + short timezone; + short dstflag; /* Field not used */ +}; + +#else + +#include <sys/timeb.h> + +#endif /* defined(USG) && !defined(HAVE_FTIME) */ + +#if defined(BSD4_2) || defined(BSD4_1C) || (defined (hp9000) && !defined (hpux)) +#include <sys/time.h> +#else +#if defined(_AIX) +#include <sys/time.h> +#endif +#include <time.h> +#endif /* defined(BSD4_2) */ + +#endif /* defined(vms) */ + +#if defined (STDC_HEADERS) || defined (USG) +#include <string.h> +#endif + +#if sgi +#undef timezone +#endif + +extern struct tm *localtime(); + +#define yyparse getdate_yyparse +#define yylex getdate_yylex +#define yyerror getdate_yyerror + +#if !defined(lint) && !defined(SABER) +static char RCS[] = + "$Header: str2date.y,v 2.1 90/09/06 08:15:06 cronan Exp $"; +#endif /* !defined(lint) && !defined(SABER) */ + + +#define EPOCH 1970 +#define HOUR(x) ((time_t)(x) * 60) +#define SECSPERDAY (24L * 60L * 60L) + + +/* +** An entry in the lexical lookup table. +*/ +typedef struct _TABLE { + char *name; + int type; + time_t value; +} TABLE; + + +/* +** Daylight-savings mode: on, off, or not yet known. +*/ +typedef enum _DSTMODE { + DSTon, DSToff, DSTmaybe +} DSTMODE; + +/* +** Meridian: am, pm, or 24-hour style. +*/ +typedef enum _MERIDIAN { + MERam, MERpm, MER24 +} MERIDIAN; + + +/* +** Global variables. We could get rid of most of these by using a good +** union as the yacc stack. (This routine was originally written before +** yacc had the %union construct.) Maybe someday; right now we only use +** the %union very rarely. +*/ +static char *yyInput; +static DSTMODE yyDSTmode; +static time_t yyDayOrdinal; +static time_t yyDayNumber; +static int yyHaveDate; +static int yyHaveDay; +static int yyHaveRel; +static int yyHaveTime; +static int yyHaveZone; +static time_t yyTimezone; +static time_t yyDay; +static time_t yyHour; +static time_t yyMinutes; +static time_t yyMonth; +static time_t yySeconds; +static time_t yyYear; +static MERIDIAN yyMeridian; +static time_t yyRelMonth; +static time_t yyRelSeconds; + +%} + +%union { + time_t Number; + enum _MERIDIAN Meridian; +} + +%token tAGO tDAY tDAYZONE tID tMERIDIAN tMINUTE_UNIT tMONTH tMONTH_UNIT +%token tSEC_UNIT tSNUMBER tUNUMBER tZONE tDST + +%type <Number> tDAY tDAYZONE tMINUTE_UNIT tMONTH tMONTH_UNIT +%type <Number> tSEC_UNIT tSNUMBER tUNUMBER tZONE +%type <Meridian> tMERIDIAN o_merid + +%% + +spec : /* NULL */ + | spec item + ; + +item : time { + yyHaveTime++; + } + | zone { + yyHaveZone++; + } + | date { + yyHaveDate++; + } + | day { + yyHaveDay++; + } + | rel { + yyHaveRel++; + } + | number + ; + +time : tUNUMBER tMERIDIAN { + yyHour = $1; + yyMinutes = 0; + yySeconds = 0; + yyMeridian = $2; + } + | tUNUMBER ':' tUNUMBER o_merid { + yyHour = $1; + yyMinutes = $3; + yySeconds = 0; + yyMeridian = $4; + } + | tUNUMBER ':' tUNUMBER tSNUMBER { + yyHour = $1; + yyMinutes = $3; + yyMeridian = MER24; + yyDSTmode = DSToff; + yyTimezone = - ($4 % 100 + ($4 / 100) * 60); + } + | tUNUMBER ':' tUNUMBER ':' tUNUMBER o_merid { + yyHour = $1; + yyMinutes = $3; + yySeconds = $5; + yyMeridian = $6; + } + | tUNUMBER ':' tUNUMBER ':' tUNUMBER tSNUMBER { + yyHour = $1; + yyMinutes = $3; + yySeconds = $5; + yyMeridian = MER24; + yyDSTmode = DSToff; + yyTimezone = - ($6 % 100 + ($6 / 100) * 60); + } + ; + +zone : tZONE { + yyTimezone = $1; + yyDSTmode = DSToff; + } + | tDAYZONE { + yyTimezone = $1; + yyDSTmode = DSTon; + } + | + tZONE tDST { + yyTimezone = $1; + yyDSTmode = DSTon; + } + ; + +day : tDAY { + yyDayOrdinal = 1; + yyDayNumber = $1; + } + | tDAY ',' { + yyDayOrdinal = 1; + yyDayNumber = $1; + } + | tUNUMBER tDAY { + yyDayOrdinal = $1; + yyDayNumber = $2; + } + ; + +date : tUNUMBER '/' tUNUMBER { + yyMonth = $1; + yyDay = $3; + } + | tUNUMBER '/' tUNUMBER '/' tUNUMBER { + yyMonth = $1; + yyDay = $3; + yyYear = $5; + } + | tUNUMBER tSNUMBER tSNUMBER { + /* ISO 8601 format. yyyy-mm-dd. */ + yyYear = $1; + yyMonth = -$2; + yyDay = -$3; + } + | tMONTH tUNUMBER { + yyMonth = $1; + yyDay = $2; + } + | tMONTH tUNUMBER ',' tUNUMBER { + yyMonth = $1; + yyDay = $2; + yyYear = $4; + } + | tUNUMBER tMONTH { + yyMonth = $2; + yyDay = $1; + } + | tUNUMBER tMONTH tUNUMBER { + yyMonth = $2; + yyDay = $1; + yyYear = $3; + } + ; + +rel : relunit tAGO { + yyRelSeconds = -yyRelSeconds; + yyRelMonth = -yyRelMonth; + } + | relunit + ; + +relunit : tUNUMBER tMINUTE_UNIT { + yyRelSeconds += $1 * $2 * 60L; + } + | tSNUMBER tMINUTE_UNIT { + yyRelSeconds += $1 * $2 * 60L; + } + | tMINUTE_UNIT { + yyRelSeconds += $1 * 60L; + } + | tSNUMBER tSEC_UNIT { + yyRelSeconds += $1; + } + | tUNUMBER tSEC_UNIT { + yyRelSeconds += $1; + } + | tSEC_UNIT { + yyRelSeconds++; + } + | tSNUMBER tMONTH_UNIT { + yyRelMonth += $1 * $2; + } + | tUNUMBER tMONTH_UNIT { + yyRelMonth += $1 * $2; + } + | tMONTH_UNIT { + yyRelMonth += $1; + } + ; + +number : tUNUMBER { + if (yyHaveTime && yyHaveDate && !yyHaveRel) + yyYear = $1; + else { + if($1>10000) { + time_t date_part; + + date_part= $1/10000; + yyHaveDate++; + yyDay= (date_part)%100; + yyMonth= (date_part/100)%100; + yyYear = date_part/10000; + } + yyHaveTime++; + if ($1 < 100) { + yyHour = $1; + yyMinutes = 0; + } + else { + yyHour = $1 / 100; + yyMinutes = $1 % 100; + } + yySeconds = 0; + yyMeridian = MER24; + } + } + ; + +o_merid : /* NULL */ { + $$ = MER24; + } + | tMERIDIAN { + $$ = $1; + } + ; + +%% + +/* Month and day table. */ +static TABLE const MonthDayTable[] = { + { "january", tMONTH, 1 }, + { "february", tMONTH, 2 }, + { "march", tMONTH, 3 }, + { "april", tMONTH, 4 }, + { "may", tMONTH, 5 }, + { "june", tMONTH, 6 }, + { "july", tMONTH, 7 }, + { "august", tMONTH, 8 }, + { "september", tMONTH, 9 }, + { "sept", tMONTH, 9 }, + { "october", tMONTH, 10 }, + { "november", tMONTH, 11 }, + { "december", tMONTH, 12 }, + { "sunday", tDAY, 0 }, + { "monday", tDAY, 1 }, + { "tuesday", tDAY, 2 }, + { "tues", tDAY, 2 }, + { "wednesday", tDAY, 3 }, + { "wednes", tDAY, 3 }, + { "thursday", tDAY, 4 }, + { "thur", tDAY, 4 }, + { "thurs", tDAY, 4 }, + { "friday", tDAY, 5 }, + { "saturday", tDAY, 6 }, + { NULL } +}; + +/* Time units table. */ +static TABLE const UnitsTable[] = { + { "year", tMONTH_UNIT, 12 }, + { "month", tMONTH_UNIT, 1 }, + { "fortnight", tMINUTE_UNIT, 14 * 24 * 60 }, + { "week", tMINUTE_UNIT, 7 * 24 * 60 }, + { "day", tMINUTE_UNIT, 1 * 24 * 60 }, + { "hour", tMINUTE_UNIT, 60 }, + { "minute", tMINUTE_UNIT, 1 }, + { "min", tMINUTE_UNIT, 1 }, + { "second", tSEC_UNIT, 1 }, + { "sec", tSEC_UNIT, 1 }, + { NULL } +}; + +/* Assorted relative-time words. */ +static TABLE const OtherTable[] = { + { "tomorrow", tMINUTE_UNIT, 1 * 24 * 60 }, + { "yesterday", tMINUTE_UNIT, -1 * 24 * 60 }, + { "today", tMINUTE_UNIT, 0 }, + { "now", tMINUTE_UNIT, 0 }, + { "last", tUNUMBER, -1 }, + { "this", tMINUTE_UNIT, 0 }, + { "next", tUNUMBER, 2 }, + { "first", tUNUMBER, 1 }, +/* { "second", tUNUMBER, 2 }, */ + { "third", tUNUMBER, 3 }, + { "fourth", tUNUMBER, 4 }, + { "fifth", tUNUMBER, 5 }, + { "sixth", tUNUMBER, 6 }, + { "seventh", tUNUMBER, 7 }, + { "eighth", tUNUMBER, 8 }, + { "ninth", tUNUMBER, 9 }, + { "tenth", tUNUMBER, 10 }, + { "eleventh", tUNUMBER, 11 }, + { "twelfth", tUNUMBER, 12 }, + { "ago", tAGO, 1 }, + { NULL } +}; + +/* The timezone table. */ +/* Some of these are commented out because a time_t can't store a float. */ +static TABLE const TimezoneTable[] = { + { "gmt", tZONE, HOUR( 0) }, /* Greenwich Mean */ + { "ut", tZONE, HOUR( 0) }, /* Universal (Coordinated) */ + { "utc", tZONE, HOUR( 0) }, + { "wet", tZONE, HOUR( 0) }, /* Western European */ + { "bst", tDAYZONE, HOUR( 0) }, /* British Summer */ + { "wat", tZONE, HOUR( 1) }, /* West Africa */ + { "at", tZONE, HOUR( 2) }, /* Azores */ +#if 0 + /* For completeness. BST is also British Summer, and GST is + * also Guam Standard. */ + { "bst", tZONE, HOUR( 3) }, /* Brazil Standard */ + { "gst", tZONE, HOUR( 3) }, /* Greenland Standard */ +#endif +#if 0 + { "nft", tZONE, HOUR(3.5) }, /* Newfoundland */ + { "nst", tZONE, HOUR(3.5) }, /* Newfoundland Standard */ + { "ndt", tDAYZONE, HOUR(3.5) }, /* Newfoundland Daylight */ +#endif + { "ast", tZONE, HOUR( 4) }, /* Atlantic Standard */ + { "adt", tDAYZONE, HOUR( 4) }, /* Atlantic Daylight */ + { "est", tZONE, HOUR( 5) }, /* Eastern Standard */ + { "edt", tDAYZONE, HOUR( 5) }, /* Eastern Daylight */ + { "cst", tZONE, HOUR( 6) }, /* Central Standard */ + { "cdt", tDAYZONE, HOUR( 6) }, /* Central Daylight */ + { "mst", tZONE, HOUR( 7) }, /* Mountain Standard */ + { "mdt", tDAYZONE, HOUR( 7) }, /* Mountain Daylight */ + { "pst", tZONE, HOUR( 8) }, /* Pacific Standard */ + { "pdt", tDAYZONE, HOUR( 8) }, /* Pacific Daylight */ + { "yst", tZONE, HOUR( 9) }, /* Yukon Standard */ + { "ydt", tDAYZONE, HOUR( 9) }, /* Yukon Daylight */ + { "hst", tZONE, HOUR(10) }, /* Hawaii Standard */ + { "hdt", tDAYZONE, HOUR(10) }, /* Hawaii Daylight */ + { "cat", tZONE, HOUR(10) }, /* Central Alaska */ + { "ahst", tZONE, HOUR(10) }, /* Alaska-Hawaii Standard */ + { "nt", tZONE, HOUR(11) }, /* Nome */ + { "idlw", tZONE, HOUR(12) }, /* International Date Line West */ + { "cet", tZONE, -HOUR(1) }, /* Central European */ + { "met", tZONE, -HOUR(1) }, /* Middle European */ + { "mewt", tZONE, -HOUR(1) }, /* Middle European Winter */ + { "mest", tDAYZONE, -HOUR(1) }, /* Middle European Summer */ + { "swt", tZONE, -HOUR(1) }, /* Swedish Winter */ + { "sst", tDAYZONE, -HOUR(1) }, /* Swedish Summer */ + { "fwt", tZONE, -HOUR(1) }, /* French Winter */ + { "fst", tDAYZONE, -HOUR(1) }, /* French Summer */ + { "eet", tZONE, -HOUR(2) }, /* Eastern Europe, USSR Zone 1 */ + { "bt", tZONE, -HOUR(3) }, /* Baghdad, USSR Zone 2 */ +#if 0 + { "it", tZONE, -HOUR(3.5) },/* Iran */ +#endif + { "zp4", tZONE, -HOUR(4) }, /* USSR Zone 3 */ + { "zp5", tZONE, -HOUR(5) }, /* USSR Zone 4 */ +#if 0 + { "ist", tZONE, -HOUR(5.5) },/* Indian Standard */ +#endif + { "zp6", tZONE, -HOUR(6) }, /* USSR Zone 5 */ +#if 0 + /* For completeness. NST is also Newfoundland Stanard, and SST is + * also Swedish Summer. */ + { "nst", tZONE, -HOUR(6.5) },/* North Sumatra */ + { "sst", tZONE, -HOUR(7) }, /* South Sumatra, USSR Zone 6 */ +#endif /* 0 */ + { "wast", tZONE, -HOUR(7) }, /* West Australian Standard */ + { "wadt", tDAYZONE, -HOUR(7) }, /* West Australian Daylight */ +#if 0 + { "jt", tZONE, -HOUR(7.5) },/* Java (3pm in Cronusland!) */ +#endif + { "cct", tZONE, -HOUR(8) }, /* China Coast, USSR Zone 7 */ + { "jst", tZONE, -HOUR(9) }, /* Japan Standard, USSR Zone 8 */ +#if 0 + { "cast", tZONE, -HOUR(9.5) },/* Central Australian Standard */ + { "cadt", tDAYZONE, -HOUR(9.5) },/* Central Australian Daylight */ +#endif + { "east", tZONE, -HOUR(10) }, /* Eastern Australian Standard */ + { "eadt", tDAYZONE, -HOUR(10) }, /* Eastern Australian Daylight */ + { "gst", tZONE, -HOUR(10) }, /* Guam Standard, USSR Zone 9 */ + { "nzt", tZONE, -HOUR(12) }, /* New Zealand */ + { "nzst", tZONE, -HOUR(12) }, /* New Zealand Standard */ + { "nzdt", tDAYZONE, -HOUR(12) }, /* New Zealand Daylight */ + { "idle", tZONE, -HOUR(12) }, /* International Date Line East */ + { NULL } +}; + +/* Military timezone table. */ +static TABLE const MilitaryTable[] = { + { "a", tZONE, HOUR( 1) }, + { "b", tZONE, HOUR( 2) }, + { "c", tZONE, HOUR( 3) }, + { "d", tZONE, HOUR( 4) }, + { "e", tZONE, HOUR( 5) }, + { "f", tZONE, HOUR( 6) }, + { "g", tZONE, HOUR( 7) }, + { "h", tZONE, HOUR( 8) }, + { "i", tZONE, HOUR( 9) }, + { "k", tZONE, HOUR( 10) }, + { "l", tZONE, HOUR( 11) }, + { "m", tZONE, HOUR( 12) }, + { "n", tZONE, HOUR(- 1) }, + { "o", tZONE, HOUR(- 2) }, + { "p", tZONE, HOUR(- 3) }, + { "q", tZONE, HOUR(- 4) }, + { "r", tZONE, HOUR(- 5) }, + { "s", tZONE, HOUR(- 6) }, + { "t", tZONE, HOUR(- 7) }, + { "u", tZONE, HOUR(- 8) }, + { "v", tZONE, HOUR(- 9) }, + { "w", tZONE, HOUR(-10) }, + { "x", tZONE, HOUR(-11) }, + { "y", tZONE, HOUR(-12) }, + { "z", tZONE, HOUR( 0) }, + { NULL } +}; + + + + +/* ARGSUSED */ +static int +yyerror(s) + char *s; +{ + return 0; +} + + +static time_t +ToSeconds(Hours, Minutes, Seconds, Meridian) + time_t Hours; + time_t Minutes; + time_t Seconds; + MERIDIAN Meridian; +{ + if (Minutes < 0 || Minutes > 59 || Seconds < 0 || Seconds > 59) + return -1; + switch (Meridian) { + case MER24: + if (Hours < 0 || Hours > 23) + return -1; + return (Hours * 60L + Minutes) * 60L + Seconds; + case MERam: + if (Hours < 1 || Hours > 12) + return -1; + return (Hours * 60L + Minutes) * 60L + Seconds; + case MERpm: + if (Hours < 1 || Hours > 12) + return -1; + return ((Hours + 12) * 60L + Minutes) * 60L + Seconds; + } + /* NOTREACHED */ +} + + +static time_t +Convert(Month, Day, Year, Hours, Minutes, Seconds, Meridian, DSTmode) + time_t Month; + time_t Day; + time_t Year; + time_t Hours; + time_t Minutes; + time_t Seconds; + MERIDIAN Meridian; + DSTMODE DSTmode; +{ + static int DaysInMonth[12] = { + 31, 0, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 + }; + time_t tod; + time_t Julian; + int i; + + if (Year < 0) + Year = -Year; + if (Year < 100) + Year += 1900; + DaysInMonth[1] = Year % 4 == 0 && (Year % 100 != 0 || Year % 400 == 0) + ? 29 : 28; + if (Year < EPOCH || Year > 1999 + || Month < 1 || Month > 12 + /* Lint fluff: "conversion from long may lose accuracy" */ + || Day < 1 || Day > DaysInMonth[(int)--Month]) + return -1; + + for (Julian = Day - 1, i = 0; i < Month; i++) + Julian += DaysInMonth[i]; + for (i = EPOCH; i < Year; i++) + Julian += 365 + (i % 4 == 0); + Julian *= SECSPERDAY; + Julian += yyTimezone * 60L; + if ((tod = ToSeconds(Hours, Minutes, Seconds, Meridian)) < 0) + return -1; + Julian += tod; + if (DSTmode == DSTon + || (DSTmode == DSTmaybe && localtime(&Julian)->tm_isdst)) + Julian -= 60 * 60; + return Julian; +} + + +static time_t +DSTcorrect(Start, Future) + time_t Start; + time_t Future; +{ + time_t StartDay; + time_t FutureDay; + + StartDay = (localtime(&Start)->tm_hour + 1) % 24; + FutureDay = (localtime(&Future)->tm_hour + 1) % 24; + return (Future - Start) + (StartDay - FutureDay) * 60L * 60L; +} + + +static time_t +RelativeDate(Start, DayOrdinal, DayNumber) + time_t Start; + time_t DayOrdinal; + time_t DayNumber; +{ + struct tm *tm; + time_t now; + + now = Start; + tm = localtime(&now); + now += SECSPERDAY * ((DayNumber - tm->tm_wday + 7) % 7); + now += 7 * SECSPERDAY * (DayOrdinal <= 0 ? DayOrdinal : DayOrdinal - 1); + return DSTcorrect(Start, now); +} + + +static time_t +RelativeMonth(Start, RelMonth) + time_t Start; + time_t RelMonth; +{ + struct tm *tm; + time_t Month; + time_t Year; + + if (RelMonth == 0) + return 0; + tm = localtime(&Start); + Month = 12 * tm->tm_year + tm->tm_mon + RelMonth; + Year = Month / 12; + Month = Month % 12 + 1; + return DSTcorrect(Start, + Convert(Month, (time_t)tm->tm_mday, Year, + (time_t)tm->tm_hour, (time_t)tm->tm_min, (time_t)tm->tm_sec, + MER24, DSTmaybe)); +} + + +static int +LookupWord(buff) + char *buff; +{ + register char *p; + register char *q; + register const TABLE *tp; + int i; + int abbrev; + + /* Make it lowercase. */ + for (p = buff; *p; p++) + if (isupper(*p)) + *p = tolower(*p); + + if (strcmp(buff, "am") == 0 || strcmp(buff, "a.m.") == 0) { + yylval.Meridian = MERam; + return tMERIDIAN; + } + if (strcmp(buff, "pm") == 0 || strcmp(buff, "p.m.") == 0) { + yylval.Meridian = MERpm; + return tMERIDIAN; + } + + /* See if we have an abbreviation for a month. */ + if (strlen(buff) == 3) + abbrev = 1; + else if (strlen(buff) == 4 && buff[3] == '.') { + abbrev = 1; + buff[3] = '\0'; + } + else + abbrev = 0; + + for (tp = MonthDayTable; tp->name; tp++) { + if (abbrev) { + if (strncmp(buff, tp->name, 3) == 0) { + yylval.Number = tp->value; + return tp->type; + } + } + else if (strcmp(buff, tp->name) == 0) { + yylval.Number = tp->value; + return tp->type; + } + } + + for (tp = TimezoneTable; tp->name; tp++) + if (strcmp(buff, tp->name) == 0) { + yylval.Number = tp->value; + return tp->type; + } + + if (strcmp(buff, "dst") == 0) + return tDST; + + for (tp = UnitsTable; tp->name; tp++) + if (strcmp(buff, tp->name) == 0) { + yylval.Number = tp->value; + return tp->type; + } + + /* Strip off any plural and try the units table again. */ + i = strlen(buff) - 1; + if (buff[i] == 's') { + buff[i] = '\0'; + for (tp = UnitsTable; tp->name; tp++) + if (strcmp(buff, tp->name) == 0) { + yylval.Number = tp->value; + return tp->type; + } + buff[i] = 's'; /* Put back for "this" in OtherTable. */ + } + + for (tp = OtherTable; tp->name; tp++) + if (strcmp(buff, tp->name) == 0) { + yylval.Number = tp->value; + return tp->type; + } + + /* Military timezones. */ + if (buff[1] == '\0' && isalpha(*buff)) { + for (tp = MilitaryTable; tp->name; tp++) + if (strcmp(buff, tp->name) == 0) { + yylval.Number = tp->value; + return tp->type; + } + } + + /* Drop out any periods and try the timezone table again. */ + for (i = 0, p = q = buff; *q; q++) + if (*q != '.') + *p++ = *q; + else + i++; + *p = '\0'; + if (i) + for (tp = TimezoneTable; tp->name; tp++) + if (strcmp(buff, tp->name) == 0) { + yylval.Number = tp->value; + return tp->type; + } + + return tID; +} + + +static int +yylex() +{ + register char c; + register char *p; + char buff[20]; + int Count; + int sign; + + for ( ; ; ) { + while (isspace(*yyInput)) + yyInput++; + + if (isdigit(c = *yyInput) || c == '-' || c == '+') { + if (c == '-' || c == '+') { + sign = c == '-' ? -1 : 1; + if (!isdigit(*++yyInput)) + /* skip the '-' sign */ + continue; + } + else + sign = 0; + for (yylval.Number = 0; isdigit(c = *yyInput++); ) + yylval.Number = 10 * yylval.Number + c - '0'; + yyInput--; + if (sign < 0) + yylval.Number = -yylval.Number; + return sign ? tSNUMBER : tUNUMBER; + } + if (isalpha(c)) { + for (p = buff; isalpha(c = *yyInput++) || c == '.'; ) + if (p < &buff[sizeof buff - 1]) + *p++ = c; + *p = '\0'; + yyInput--; + return LookupWord(buff); + } + if (c != '(') + return *yyInput++; + Count = 0; + do { + c = *yyInput++; + if (c == '\0') + return c; + if (c == '(') + Count++; + else if (c == ')') + Count--; + } while (Count > 0); + } +} + + +time_t +get_date(p, now) + char *p; + struct timeb *now; +{ + struct tm *tm; + struct timeb ftz; + time_t Start; + time_t tod; + + yyInput = p; + if (now == NULL) { + now = &ftz; +#if !defined(HAVE_FTIME) + (void)time(&ftz.time); + /* Set the timezone global. */ + tzset(); + { +#if sgi + ftz.timezone = (int) _timezone / 60; +#else /* not sgi */ +#ifdef __386BSD__ + ftz.timezone = 0; +#else /* neither sgi nor 386BSD */ +#if defined (USG) + extern time_t timezone; + + ftz.timezone = (int) timezone / 60; +#else /* neither sgi nor 386BSD nor USG */ + struct timeval tv; + struct timezone tz; + + gettimeofday (&tv, &tz); + ftz.timezone = (int) tz.tz_minuteswest; +#endif /* neither sgi nor 386BSD nor USG */ +#endif /* neither sgi nor 386BSD */ +#endif /* not sgi */ + } +#else /* HAVE_FTIME */ + (void)ftime(&ftz); +#endif /* HAVE_FTIME */ + } + + tm = localtime(&now->time); + yyYear = tm->tm_year; + yyMonth = tm->tm_mon + 1; + yyDay = tm->tm_mday; + yyTimezone = now->timezone; + yyDSTmode = DSTmaybe; + yyHour = 0; + yyMinutes = 0; + yySeconds = 0; + yyMeridian = MER24; + yyRelSeconds = 0; + yyRelMonth = 0; + yyHaveDate = 0; + yyHaveDay = 0; + yyHaveRel = 0; + yyHaveTime = 0; + yyHaveZone = 0; + + if (yyparse() + || yyHaveTime > 1 || yyHaveZone > 1 || yyHaveDate > 1 || yyHaveDay > 1) + return -1; + + if (yyHaveDate || yyHaveTime || yyHaveDay) { + Start = Convert(yyMonth, yyDay, yyYear, yyHour, yyMinutes, yySeconds, + yyMeridian, yyDSTmode); + if (Start < 0) + return -1; + } + else { + Start = now->time; + if (!yyHaveRel) + Start -= ((tm->tm_hour * 60L + tm->tm_min) * 60L) + tm->tm_sec; + } + + Start += yyRelSeconds; + Start += RelativeMonth(Start, yyRelMonth); + + if (yyHaveDay && !yyHaveDate) { + tod = RelativeDate(Start, yyDayOrdinal, yyDayNumber); + Start += tod; + } + + /* Have to do *something* with a legitimate -1 so it's distinguishable + * from the error return value. (Alternately could set errno on error.) */ + return Start == -1 ? 0 : Start; +} + + +#if defined(TEST) + +/* ARGSUSED */ +main(ac, av) + int ac; + char *av[]; +{ + char buff[128]; + time_t d; + + (void)printf("Enter date, or blank line to exit.\n\t> "); + (void)fflush(stdout); + while (gets(buff) && buff[0]) { + d = get_date(buff, (struct timeb *)NULL); + if (d == -1) + (void)printf("Bad format - couldn't convert.\n"); + else + (void)printf("%s", ctime(&d)); + (void)printf("\t> "); + (void)fflush(stdout); + } + exit(0); + /* NOTREACHED */ +} +#endif /* defined(TEST) */ diff --git a/gnu/usr.bin/tar/getoldopt.c b/gnu/usr.bin/tar/getoldopt.c new file mode 100644 index 0000000..27511b9 --- /dev/null +++ b/gnu/usr.bin/tar/getoldopt.c @@ -0,0 +1,96 @@ +/* Replacement for getopt() that can be used by tar. + Copyright (C) 1988 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * Plug-compatible replacement for getopt() for parsing tar-like + * arguments. If the first argument begins with "-", it uses getopt; + * otherwise, it uses the old rules used by tar, dump, and ps. + * + * Written 25 August 1985 by John Gilmore (ihnp4!hoptoad!gnu) + */ + +#include <stdio.h> +#include "getopt.h" +#include "tar.h" /* For msg() declaration if STDC_MSG. */ +#include <sys/types.h> +#include "port.h" + +int +getoldopt (argc, argv, optstring, long_options, opt_index) + int argc; + char **argv; + char *optstring; + struct option *long_options; + int *opt_index; +{ + extern char *optarg; /* Points to next arg */ + extern int optind; /* Global argv index */ + static char *key; /* Points to next keyletter */ + static char use_getopt; /* !=0 if argv[1][0] was '-' */ + char c; + char *place; + + optarg = NULL; + + if (key == NULL) + { /* First time */ + if (argc < 2) + return EOF; + key = argv[1]; + if ((*key == '-') || (*key == '+')) + use_getopt++; + else + optind = 2; + } + + if (use_getopt) + return getopt_long (argc, argv, optstring, + long_options, opt_index); + + c = *key++; + if (c == '\0') + { + key--; + return EOF; + } + place = index (optstring, c); + + if (place == NULL || c == ':') + { + msg ("unknown option %c", c); + return ('?'); + } + + place++; + if (*place == ':') + { + if (optind < argc) + { + optarg = argv[optind]; + optind++; + } + else + { + msg ("%c argument missing", c); + return ('?'); + } + } + + return (c); +} diff --git a/gnu/usr.bin/tar/getopt.c b/gnu/usr.bin/tar/getopt.c new file mode 100644 index 0000000..3db9abf --- /dev/null +++ b/gnu/usr.bin/tar/getopt.c @@ -0,0 +1,712 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu + before changing it! + + Copyright (C) 1987, 88, 89, 90, 91, 92, 1993 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* NOTE!!! AIX requires this to be the first thing in the file. + Do not put ANYTHING before it! */ +#if !defined (__GNUC__) && defined (_AIX) + #pragma alloca +#endif + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if defined (HAVE_ALLOCA_H) || (defined(sparc) && (defined(sun) || (!defined(USG) && !defined(SVR4) && !defined(__svr4__)))) +#include <alloca.h> +#else +#ifndef _AIX +char *alloca (); +#endif +#endif /* alloca.h */ +#endif /* not __GNUC__ */ + +#if !__STDC__ && !defined(const) && IN_GCC +#define const +#endif + +#include <stdio.h> + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#undef alloca +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +#include <stdlib.h> +#else /* Not GNU C library. */ +#define __alloca alloca +#endif /* GNU C library. */ + +/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a + long-named option. Because this is not POSIX.2 compliant, it is + being phased out. */ +/* #define GETOPT_COMPAT */ + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg = 0; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* XXX 1003.2 says this must be 1 before any call. */ +int optind = 0; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return EOF with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +#include <string.h> +#define my_index strchr +#define my_bcopy(src, dst, n) memcpy ((dst), (src), (n)) +#else + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +char *getenv (); + +static char * +my_index (string, chr) + char *string; + int chr; +{ + while (*string) + { + if (*string == chr) + return string; + string++; + } + return 0; +} + +static void +my_bcopy (from, to, size) + char *from, *to; + int size; +{ + int i; + for (i = 0; i < size; i++) + to[i] = from[i]; +} +#endif /* GNU C library. */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (argv) + char **argv; +{ + int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *); + char **temp = (char **) __alloca (nonopts_size); + + /* Interchange the two blocks of data in ARGV. */ + + my_bcopy ((char *) &argv[first_nonopt], (char *) temp, nonopts_size); + my_bcopy ((char *) &argv[last_nonopt], (char *) &argv[first_nonopt], + (optind - last_nonopt) * sizeof (char *)); + my_bcopy ((char *) temp, + (char *) &argv[first_nonopt + optind - last_nonopt], + nonopts_size); + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns `EOF'. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int option_index; + + optarg = 0; + + /* Initialize the internal data when the first call is made. + Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + if (optind == 0) + { + first_nonopt = last_nonopt = optind = 1; + + nextchar = NULL; + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (getenv ("POSIXLY_CORRECT") != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + } + + if (nextchar == NULL || *nextchar == '\0') + { + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Now skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc + && (argv[optind][0] != '-' || argv[optind][1] == '\0') +#ifdef GETOPT_COMPAT + && (longopts == NULL + || argv[optind][0] != '+' || argv[optind][1] == '\0') +#endif /* GETOPT_COMPAT */ + ) + optind++; + last_nonopt = optind; + } + + /* Special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return EOF; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if ((argv[optind][0] != '-' || argv[optind][1] == '\0') +#ifdef GETOPT_COMPAT + && (longopts == NULL + || argv[optind][0] != '+' || argv[optind][1] == '\0') +#endif /* GETOPT_COMPAT */ + ) + { + if (ordering == REQUIRE_ORDER) + return EOF; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Start decoding its characters. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + if (longopts != NULL + && ((argv[optind][0] == '-' + && (argv[optind][1] == '-' || long_only)) +#ifdef GETOPT_COMPAT + || argv[optind][0] == '+' +#endif /* GETOPT_COMPAT */ + )) + { + const struct option *p; + char *s = nextchar; + int exact = 0; + int ambig = 0; + const struct option *pfound = NULL; + int indfound; + + while (*s && *s != '=') + s++; + + /* Test all options for either exact match or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; + p++, option_index++) + if (!strncmp (p->name, nextchar, s - nextchar)) + { + if (s - nextchar == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, "%s: option `%s' is ambiguous\n", + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*s) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = s + 1; + else + { + if (opterr) + { + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + "%s: option `--%s' doesn't allow an argument\n", + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + "%s: option `%c%s' doesn't allow an argument\n", + argv[0], argv[optind - 1][0], pfound->name); + } + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, "%s: option `%s' requires an argument\n", + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' +#ifdef GETOPT_COMPAT + || argv[optind][0] == '+' +#endif /* GETOPT_COMPAT */ + || my_index (optstring, *nextchar) == NULL) + { + if (opterr) + { + if (argv[optind][1] == '-') + /* --option */ + fprintf (stderr, "%s: unrecognized option `--%s'\n", + argv[0], nextchar); + else + /* +option or -option */ + fprintf (stderr, "%s: unrecognized option `%c%s'\n", + argv[0], argv[optind][0], nextchar); + } + nextchar = (char *) ""; + optind++; + return '?'; + } + } + + /* Look at and handle the next option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (opterr) + { +#if 0 + if (c < 040 || c >= 0177) + fprintf (stderr, "%s: unrecognized option, character code 0%o\n", + argv[0], c); + else + fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c); +#else + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c); +#endif + } + optopt = c; + return '?'; + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = 0; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { +#if 0 + fprintf (stderr, "%s: option `-%c' requires an argument\n", + argv[0], c); +#else + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, "%s: option requires an argument -- %c\n", + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == EOF) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/gnu/usr.bin/tar/getopt.h b/gnu/usr.bin/tar/getopt.h new file mode 100644 index 0000000..93a5cf7 --- /dev/null +++ b/gnu/usr.bin/tar/getopt.h @@ -0,0 +1,125 @@ +/* Declarations for getopt. + Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _GETOPT_H +#define _GETOPT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +#if __STDC__ + const char *name; +#else + char *name; +#endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +#if __STDC__ +#if defined(__GNU_LIBRARY__) +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int argc, char *const *argv, const char *shortopts); +#else /* not __GNU_LIBRARY__ */ +extern int getopt (); +#endif /* not __GNU_LIBRARY__ */ +extern int getopt_long (int argc, char *const *argv, const char *shortopts, + const struct option *longopts, int *longind); +extern int getopt_long_only (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind, + int long_only); +#else /* not __STDC__ */ +extern int getopt (); +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +#endif /* not __STDC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* _GETOPT_H */ diff --git a/gnu/usr.bin/tar/getopt1.c b/gnu/usr.bin/tar/getopt1.c new file mode 100644 index 0000000..c3582cf --- /dev/null +++ b/gnu/usr.bin/tar/getopt1.c @@ -0,0 +1,161 @@ +/* Getopt for GNU. + Copyright (C) 1987, 88, 89, 90, 91, 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "getopt.h" + +#if !__STDC__ && !defined(const) && IN_GCC +#define const +#endif + +#include <stdio.h> + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#include <stdlib.h> +#else +char *getenv (); +#endif + +#ifndef NULL +#define NULL 0 +#endif + +int +getopt_long (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 1); +} + +#ifdef TEST + +#include <stdio.h> + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == EOF) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option d with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/gnu/usr.bin/tar/getpagesize.h b/gnu/usr.bin/tar/getpagesize.h new file mode 100644 index 0000000..2d43f26 --- /dev/null +++ b/gnu/usr.bin/tar/getpagesize.h @@ -0,0 +1,38 @@ +#ifdef BSD +#ifndef BSD4_1 +#define HAVE_GETPAGESIZE +#endif +#endif + +#ifndef HAVE_GETPAGESIZE + +#ifdef VMS +#define getpagesize() 512 +#endif + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#ifdef _SC_PAGESIZE +#define getpagesize() sysconf(_SC_PAGESIZE) +#else + +#include <sys/param.h> + +#ifdef EXEC_PAGESIZE +#define getpagesize() EXEC_PAGESIZE +#else +#ifdef NBPG +#define getpagesize() NBPG * CLSIZE +#ifndef CLSIZE +#define CLSIZE 1 +#endif /* no CLSIZE */ +#else /* no NBPG */ +#define getpagesize() NBPC +#endif /* no NBPG */ +#endif /* no EXEC_PAGESIZE */ +#endif /* no _SC_PAGESIZE */ + +#endif /* not HAVE_GETPAGESIZE */ + diff --git a/gnu/usr.bin/tar/gnu.c b/gnu/usr.bin/tar/gnu.c new file mode 100644 index 0000000..ef51f2b --- /dev/null +++ b/gnu/usr.bin/tar/gnu.c @@ -0,0 +1,677 @@ +/* GNU dump extensions to tar. + Copyright (C) 1988, 1992, 1993 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <stdio.h> +#include <sys/types.h> +#include <ctype.h> +#include <errno.h> +#ifndef STDC_HEADERS +extern int errno; +#endif +#include <time.h> +time_t time (); + +#include "tar.h" +#include "port.h" + +#ifndef S_ISLNK +#define lstat stat +#endif + +extern time_t new_time; +extern FILE *msg_file; + +void addname (); +int check_exclude (); +extern PTR ck_malloc (); +extern PTR ck_realloc (); +int confirm (); +extern PTR init_buffer (); +extern char *get_buffer (); +int is_dot_or_dotdot (); +extern void add_buffer (); +extern void flush_buffer (); +void name_gather (); +int recursively_delete (); +void skip_file (); +char *un_quote_string (); + +extern char *new_name (); + +static void add_dir_name (); + +struct dirname + { + struct dirname *next; + char *name; + char *dir_text; + int dev; + int ino; + int allnew; + }; +static struct dirname *dir_list; +static time_t this_time; + +void +add_dir (name, dev, ino, text) + char *name; + char *text; + dev_t dev; + ino_t ino; +{ + struct dirname *dp; + + dp = (struct dirname *) ck_malloc (sizeof (struct dirname)); + if (!dp) + abort (); + dp->next = dir_list; + dir_list = dp; + dp->dev = dev; + dp->ino = ino; + dp->name = ck_malloc (strlen (name) + 1); + strcpy (dp->name, name); + dp->dir_text = text; + dp->allnew = 0; +} + +void +read_dir_file () +{ + int dev; + int ino; + char *strp; + FILE *fp; + char buf[512]; + static char *path = 0; + + if (path == 0) + path = ck_malloc (PATH_MAX); + time (&this_time); + if (gnu_dumpfile[0] != '/') + { +#if defined(__MSDOS__) || defined(HAVE_GETCWD) || defined(_POSIX_VERSION) + if (!getcwd (path, PATH_MAX)) + { + msg ("Couldn't get current directory."); + exit (EX_SYSTEM); + } +#else + char *getwd (); + + if (!getwd (path)) + { + msg ("Couldn't get current directory: %s", path); + exit (EX_SYSTEM); + } +#endif + /* If this doesn't fit, we're in serious trouble */ + strcat (path, "/"); + strcat (path, gnu_dumpfile); + gnu_dumpfile = path; + } + fp = fopen (gnu_dumpfile, "r"); + if (fp == 0 && errno != ENOENT) + { + msg_perror ("Can't open %s", gnu_dumpfile); + return; + } + if (!fp) + return; + fgets (buf, sizeof (buf), fp); + if (!f_new_files) + { + f_new_files++; + new_time = atol (buf); + } + while (fgets (buf, sizeof (buf), fp)) + { + strp = &buf[strlen (buf)]; + if (strp[-1] == '\n') + strp[-1] = '\0'; + strp = buf; + dev = atol (strp); + while (isdigit (*strp)) + strp++; + ino = atol (strp); + while (isspace (*strp)) + strp++; + while (isdigit (*strp)) + strp++; + strp++; + add_dir (un_quote_string (strp), dev, ino, (char *) 0); + } + fclose (fp); +} + +void +write_dir_file () +{ + FILE *fp; + struct dirname *dp; + char *str; + extern char *quote_copy_string (); + + fp = fopen (gnu_dumpfile, "w"); + if (fp == 0) + { + msg_perror ("Can't write to %s", gnu_dumpfile); + return; + } + fprintf (fp, "%lu\n", this_time); + for (dp = dir_list; dp; dp = dp->next) + { + if (!dp->dir_text) + continue; + str = quote_copy_string (dp->name); + if (str) + { + fprintf (fp, "%u %u %s\n", dp->dev, dp->ino, str); + free (str); + } + else + fprintf (fp, "%u %u %s\n", dp->dev, dp->ino, dp->name); + } + fclose (fp); +} + +struct dirname * +get_dir (name) + char *name; +{ + struct dirname *dp; + + for (dp = dir_list; dp; dp = dp->next) + { + if (!strcmp (dp->name, name)) + return dp; + } + return 0; +} + + +/* Collect all the names from argv[] (or whatever), then expand them into + a directory tree, and put all the directories at the beginning. */ +void +collect_and_sort_names () +{ + struct name *n, *n_next; + int num_names; + struct stat statbuf; + int name_cmp (); + char *merge_sort (); + + name_gather (); + + if (gnu_dumpfile) + read_dir_file (); + if (!namelist) + addname ("."); + for (n = namelist; n; n = n_next) + { + n_next = n->next; + if (n->found || n->dir_contents) + continue; + if (n->regexp) /* FIXME just skip regexps for now */ + continue; + if (n->change_dir) + if (chdir (n->change_dir) < 0) + { + msg_perror ("can't chdir to %s", n->change_dir); + continue; + } + +#ifdef AIX + if (statx (n->name, &statbuf, STATSIZE, STX_HIDDEN | STX_LINK)) +#else + if (lstat (n->name, &statbuf) < 0) +#endif /* AIX */ + { + msg_perror ("can't stat %s", n->name); + continue; + } + if (S_ISDIR (statbuf.st_mode)) + { + n->found++; + add_dir_name (n->name, statbuf.st_dev); + } + } + + num_names = 0; + for (n = namelist; n; n = n->next) + num_names++; + namelist = (struct name *) merge_sort ((PTR) namelist, num_names, (char *) (&(namelist->next)) - (char *) namelist, name_cmp); + + for (n = namelist; n; n = n->next) + { + n->found = 0; + } + if (gnu_dumpfile) + write_dir_file (); +} + +int +name_cmp (n1, n2) + struct name *n1, *n2; +{ + if (n1->found) + { + if (n2->found) + return strcmp (n1->name, n2->name); + else + return -1; + } + else if (n2->found) + return 1; + else + return strcmp (n1->name, n2->name); +} + +int +dirent_cmp (p1, p2) + const PTR p1; + const PTR p2; +{ + char *frst, *scnd; + + frst = (*(char **) p1) + 1; + scnd = (*(char **) p2) + 1; + + return strcmp (frst, scnd); +} + +char * +get_dir_contents (p, device) + char *p; + int device; +{ + DIR *dirp; + register struct dirent *d; + char *new_buf; + char *namebuf; + int bufsiz; + int len; + PTR the_buffer; + char *buf; + size_t n_strs; + /* int n_size;*/ + char *p_buf; + char **vec, **p_vec; + + extern int errno; + + errno = 0; + dirp = opendir (p); + bufsiz = strlen (p) + NAMSIZ; + namebuf = ck_malloc (bufsiz + 2); + if (!dirp) + { + if (errno) + msg_perror ("can't open directory %s", p); + else + msg ("error opening directory %s", p); + new_buf = NULL; + } + else + { + struct dirname *dp; + int all_children; + + dp = get_dir (p); + all_children = dp ? dp->allnew : 0; + (void) strcpy (namebuf, p); + if (p[strlen (p) - 1] != '/') + (void) strcat (namebuf, "/"); + len = strlen (namebuf); + + the_buffer = init_buffer (); + while (d = readdir (dirp)) + { + struct stat hs; + + /* Skip . and .. */ + if (is_dot_or_dotdot (d->d_name)) + continue; + if (NLENGTH (d) + len >= bufsiz) + { + bufsiz += NAMSIZ; + namebuf = ck_realloc (namebuf, bufsiz + 2); + } + (void) strcpy (namebuf + len, d->d_name); +#ifdef AIX + if (0 != f_follow_links ? + statx (namebuf, &hs, STATSIZE, STX_HIDDEN) : + statx (namebuf, &hs, STATSIZE, STX_HIDDEN | STX_LINK)) +#else + if (0 != f_follow_links ? stat (namebuf, &hs) : lstat (namebuf, &hs)) +#endif + { + msg_perror ("can't stat %s", namebuf); + continue; + } + if ((f_local_filesys && device != hs.st_dev) + || (f_exclude && check_exclude (namebuf))) + add_buffer (the_buffer, "N", 1); +#ifdef AIX + else if (S_ISHIDDEN (hs.st_mode)) + { + add_buffer (the_buffer, "D", 1); + strcat (d->d_name, "A"); + d->d_namlen++; + } +#endif /* AIX */ + else if (S_ISDIR (hs.st_mode)) + { + if (dp = get_dir (namebuf)) + { + if (dp->dev != hs.st_dev + || dp->ino != hs.st_ino) + { + if (f_verbose) + msg ("directory %s has been renamed.", namebuf); + dp->allnew = 1; + dp->dev = hs.st_dev; + dp->ino = hs.st_ino; + } + dp->dir_text = ""; + } + else + { + if (f_verbose) + msg ("Directory %s is new", namebuf); + add_dir (namebuf, hs.st_dev, hs.st_ino, ""); + dp = get_dir (namebuf); + dp->allnew = 1; + } + if (all_children) + dp->allnew = 1; + + add_buffer (the_buffer, "D", 1); + } + else if (!all_children + && f_new_files + && new_time > hs.st_mtime + && (f_new_files > 1 + || new_time > hs.st_ctime)) + add_buffer (the_buffer, "N", 1); + else + add_buffer (the_buffer, "Y", 1); + add_buffer (the_buffer, d->d_name, (int) (NLENGTH (d) + 1)); + } + add_buffer (the_buffer, "\000\000", 2); + closedir (dirp); + + /* Well, we've read in the contents of the dir, now sort them */ + buf = get_buffer (the_buffer); + if (buf[0] == '\0') + { + flush_buffer (the_buffer); + new_buf = NULL; + } + else + { + n_strs = 0; + for (p_buf = buf; *p_buf;) + { + int tmp; + + tmp = strlen (p_buf) + 1; + n_strs++; + p_buf += tmp; + } + vec = (char **) ck_malloc (sizeof (char *) * (n_strs + 1)); + for (p_vec = vec, p_buf = buf; *p_buf; p_buf += strlen (p_buf) + 1) + *p_vec++ = p_buf; + *p_vec = 0; + qsort ((PTR) vec, n_strs, sizeof (char *), dirent_cmp); + new_buf = (char *) ck_malloc (p_buf - buf + 2); + for (p_vec = vec, p_buf = new_buf; *p_vec; p_vec++) + { + char *p_tmp; + + for (p_tmp = *p_vec; *p_buf++ = *p_tmp++;) + ; + } + *p_buf++ = '\0'; + free (vec); + flush_buffer (the_buffer); + } + } + free (namebuf); + return new_buf; +} + +/* p is a directory. Add all the files in P to the namelist. If any of the + files is a directory, recurse on the subdirectory. . . */ +static void +add_dir_name (p, device) + char *p; + int device; +{ + char *new_buf; + char *p_buf; + + char *namebuf; + int buflen; + register int len; + int sublen; + + /* PTR the_buffer;*/ + + /* char *buf;*/ + /* char **vec,**p_vec;*/ + /* int n_strs,n_size;*/ + + struct name *n; + + int dirent_cmp (); + + new_buf = get_dir_contents (p, device); + + for (n = namelist; n; n = n->next) + { + if (!strcmp (n->name, p)) + { + n->dir_contents = new_buf ? new_buf : "\0\0\0\0"; + break; + } + } + + if (new_buf) + { + len = strlen (p); + buflen = NAMSIZ <= len ? len + NAMSIZ : NAMSIZ; + namebuf = ck_malloc (buflen + 1); + + (void) strcpy (namebuf, p); + if (namebuf[len - 1] != '/') + { + namebuf[len++] = '/'; + namebuf[len] = '\0'; + } + for (p_buf = new_buf; *p_buf; p_buf += sublen + 1) + { + sublen = strlen (p_buf); + if (*p_buf == 'D') + { + if (len + sublen >= buflen) + { + buflen += NAMSIZ; + namebuf = ck_realloc (namebuf, buflen + 1); + } + (void) strcpy (namebuf + len, p_buf + 1); + addname (namebuf); + add_dir_name (namebuf, device); + } + } + free (namebuf); + } +} + +/* Returns non-zero if p is . or .. This could be a macro for speed. */ +int +is_dot_or_dotdot (p) + char *p; +{ + return (p[0] == '.' && (p[1] == '\0' || (p[1] == '.' && p[2] == '\0'))); +} + + + + + + +void +gnu_restore (skipcrud) + int skipcrud; +{ + char *current_dir; + /* int current_dir_length; */ + + char *archive_dir; + /* int archive_dir_length; */ + PTR the_buffer; + char *p; + DIR *dirp; + struct dirent *d; + char *cur, *arc; + extern struct stat hstat; /* Stat struct corresponding */ + long size, copied; + char *from, *to; + extern union record *head; + + dirp = opendir (skipcrud + current_file_name); + + if (!dirp) + { + /* The directory doesn't exist now. It'll be created. + In any case, we don't have to delete any files out + of it */ + skip_file ((long) hstat.st_size); + return; + } + + the_buffer = init_buffer (); + while (d = readdir (dirp)) + { + if (is_dot_or_dotdot (d->d_name)) + continue; + + add_buffer (the_buffer, d->d_name, (int) (NLENGTH (d) + 1)); + } + closedir (dirp); + add_buffer (the_buffer, "", 1); + + current_dir = get_buffer (the_buffer); + archive_dir = (char *) ck_malloc (hstat.st_size); + if (archive_dir == 0) + { + msg ("Can't allocate %d bytes for restore", hstat.st_size); + skip_file ((long) hstat.st_size); + return; + } + to = archive_dir; + for (size = hstat.st_size; size > 0; size -= copied) + { + from = findrec ()->charptr; + if (!from) + { + msg ("Unexpected EOF in archive\n"); + break; + } + copied = endofrecs ()->charptr - from; + if (copied > size) + copied = size; + bcopy ((PTR) from, (PTR) to, (int) copied); + to += copied; + userec ((union record *) (from + copied - 1)); + } + + for (cur = current_dir; *cur; cur += strlen (cur) + 1) + { + for (arc = archive_dir; *arc; arc += strlen (arc) + 1) + { + arc++; + if (!strcmp (arc, cur)) + break; + } + if (*arc == '\0') + { + p = new_name (skipcrud + current_file_name, cur); + if (f_confirm && !confirm ("delete", p)) + { + free (p); + continue; + } + if (f_verbose) + fprintf (msg_file, "%s: deleting %s\n", tar, p); + if (recursively_delete (p)) + { + msg ("%s: Error while deleting %s\n", tar, p); + } + free (p); + } + + } + flush_buffer (the_buffer); + free (archive_dir); +} + +int +recursively_delete (path) + char *path; +{ + struct stat sbuf; + DIR *dirp; + struct dirent *dp; + char *path_buf; + /* int path_len; */ + + + if (lstat (path, &sbuf) < 0) + return 1; + if (S_ISDIR (sbuf.st_mode)) + { + + /* path_len=strlen(path); */ + dirp = opendir (path); + if (dirp == 0) + return 1; + while (dp = readdir (dirp)) + { + if (is_dot_or_dotdot (dp->d_name)) + continue; + path_buf = new_name (path, dp->d_name); + if (recursively_delete (path_buf)) + { + free (path_buf); + closedir (dirp); + return 1; + } + free (path_buf); + } + closedir (dirp); + + if (rmdir (path) < 0) + return 1; + return 0; + } + if (unlink (path) < 0) + return 1; + return 0; +} diff --git a/gnu/usr.bin/tar/list.c b/gnu/usr.bin/tar/list.c new file mode 100644 index 0000000..a0c65a3 --- /dev/null +++ b/gnu/usr.bin/tar/list.c @@ -0,0 +1,881 @@ +/* List a tar archive. + Copyright (C) 1988, 1992, 1993 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * List a tar archive. + * + * Also includes support routines for reading a tar archive. + * + * this version written 26 Aug 1985 by John Gilmore (ihnp4!hoptoad!gnu). + */ + +#include <stdio.h> +#include <ctype.h> +#include <sys/types.h> +#include <errno.h> +#ifndef STDC_HEADERS +extern int errno; +#endif +#include <time.h> + +#ifdef BSD42 +#include <sys/file.h> +#else +#ifndef V7 +#include <fcntl.h> +#endif +#endif + +#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) + +#include "tar.h" +#include "port.h" + +extern FILE *msg_file; + +long from_oct (); /* Decode octal number */ +void demode (); /* Print file mode */ +void restore_saved_dir_info (); +PTR ck_malloc (); + +union record *head; /* Points to current archive header */ +struct stat hstat; /* Stat struct corresponding */ +int head_standard; /* Tape header is in ANSI format */ + +int check_exclude (); +void close_archive (); +void decode_header (); +int findgid (); +int finduid (); +void name_gather (); +int name_match (); +void names_notfound (); +void open_archive (); +void print_header (); +int read_header (); +void saverec (); +void skip_file (); +void skip_extended_headers (); + +extern char *quote_copy_string (); + + +/* + * Main loop for reading an archive. + */ +void +read_and (do_something) + void (*do_something) (); +{ + int status = 3; /* Initial status at start of archive */ + int prev_status; + extern time_t new_time; + char save_linkflag; + + name_gather (); /* Gather all the names */ + open_archive (1); /* Open for reading */ + + for (;;) + { + prev_status = status; + status = read_header (); + switch (status) + { + + case 1: /* Valid header */ + /* We should decode next field (mode) first... */ + /* Ensure incoming names are null terminated. */ + + if (!name_match (current_file_name) + || (f_new_files && hstat.st_mtime < new_time) + || (f_exclude && check_exclude (current_file_name))) + { + + int isextended = 0; + + if (head->header.linkflag == LF_VOLHDR + || head->header.linkflag == LF_MULTIVOL + || head->header.linkflag == LF_NAMES) + { + (*do_something) (); + continue; + } + if (f_show_omitted_dirs + && head->header.linkflag == LF_DIR) + msg ("Omitting %s\n", current_file_name); + /* Skip past it in the archive */ + if (head->header.isextended) + isextended = 1; + save_linkflag = head->header.linkflag; + userec (head); + if (isextended) + { + /* register union record *exhdr; + + for (;;) { + exhdr = findrec(); + if (!exhdr->ext_hdr.isextended) { + userec(exhdr); + break; + } + } + userec(exhdr);*/ + skip_extended_headers (); + } + /* Skip to the next header on the archive */ + if (save_linkflag != LF_DIR) + skip_file ((long) hstat.st_size); + continue; + + } + + (*do_something) (); + continue; + + /* + * If the previous header was good, tell them + * that we are skipping bad ones. + */ + case 0: /* Invalid header */ + userec (head); + switch (prev_status) + { + case 3: /* Error on first record */ + msg ("Hmm, this doesn't look like a tar archive."); + /* FALL THRU */ + case 2: /* Error after record of zeroes */ + case 1: /* Error after header rec */ + msg ("Skipping to next file header..."); + case 0: /* Error after error */ + break; + } + continue; + + case 2: /* Record of zeroes */ + userec (head); + status = prev_status; /* If error after 0's */ + if (f_ignorez) + continue; + /* FALL THRU */ + case EOF: /* End of archive */ + break; + } + break; + }; + + restore_saved_dir_info (); + close_archive (); + names_notfound (); /* Print names not found */ +} + + +/* + * Print a header record, based on tar options. + */ +void +list_archive () +{ + extern char *save_name; + int isextended = 0; /* Flag to remember if head is extended */ + + /* Save the record */ + saverec (&head); + + /* Print the header record */ + if (f_verbose) + { + if (f_verbose > 1) + decode_header (head, &hstat, &head_standard, 0); + print_header (); + } + + if (f_gnudump && head->header.linkflag == LF_DUMPDIR) + { + size_t size, written, check; + char *data; + extern long save_totsize; + extern long save_sizeleft; + + userec (head); + if (f_multivol) + { + save_name = current_file_name; + save_totsize = hstat.st_size; + } + for (size = hstat.st_size; size > 0; size -= written) + { + if (f_multivol) + save_sizeleft = size; + data = findrec ()->charptr; + if (data == NULL) + { + msg ("EOF in archive file?"); + break; + } + written = endofrecs ()->charptr - data; + if (written > size) + written = size; + errno = 0; + check = fwrite (data, sizeof (char), written, msg_file); + userec ((union record *) (data + written - 1)); + if (check != written) + { + msg_perror ("only wrote %ld of %ld bytes to file %s", check, written, current_file_name); + skip_file ((long) (size) - written); + break; + } + } + if (f_multivol) + save_name = 0; + saverec ((union record **) 0); /* Unsave it */ + fputc ('\n', msg_file); + fflush (msg_file); + return; + + } + saverec ((union record **) 0);/* Unsave it */ + /* Check to see if we have an extended header to skip over also */ + if (head->header.isextended) + isextended = 1; + + /* Skip past the header in the archive */ + userec (head); + + /* + * If we needed to skip any extended headers, do so now, by + * reading extended headers and skipping past them in the + * archive. + */ + if (isextended) + { + /* register union record *exhdr; + + for (;;) { + exhdr = findrec(); + + if (!exhdr->ext_hdr.isextended) { + userec(exhdr); + break; + } + userec(exhdr); + }*/ + skip_extended_headers (); + } + + if (f_multivol) + save_name = current_file_name; + /* Skip to the next header on the archive */ + + skip_file ((long) hstat.st_size); + + if (f_multivol) + save_name = 0; +} + + +/* + * Read a record that's supposed to be a header record. + * Return its address in "head", and if it is good, the file's + * size in hstat.st_size. + * + * Return 1 for success, 0 if the checksum is bad, EOF on eof, + * 2 for a record full of zeros (EOF marker). + * + * You must always userec(head) to skip past the header which this + * routine reads. + */ +int +read_header () +{ + register int i; + register long sum, signed_sum, recsum; + register char *p; + register union record *header; + long from_oct (); + char **longp; + char *bp, *data; + int size, written; + static char *next_long_name, *next_long_link; + char *name; + +recurse: + + header = findrec (); + head = header; /* This is our current header */ + if (NULL == header) + return EOF; + + recsum = from_oct (8, header->header.chksum); + + sum = 0; + p = header->charptr; + for (i = sizeof (*header); --i >= 0;) + { + /* + * We can't use unsigned char here because of old compilers, + * e.g. V7. + */ + signed_sum += *p; + sum += 0xFF & *p++; + } + + /* Adjust checksum to count the "chksum" field as blanks. */ + for (i = sizeof (header->header.chksum); --i >= 0;) + { + sum -= 0xFF & header->header.chksum[i]; + signed_sum -= (char) header->header.chksum[i]; + } + sum += ' ' * sizeof header->header.chksum; + signed_sum += ' ' * sizeof header->header.chksum; + + if (sum == 8 * ' ') + { + /* + * This is a zeroed record...whole record is 0's except + * for the 8 blanks we faked for the checksum field. + */ + return 2; + } + + if (sum != recsum && signed_sum != recsum) + return 0; + + /* + * Good record. Decode file size and return. + */ + if (header->header.linkflag == LF_LINK) + hstat.st_size = 0; /* Links 0 size on tape */ + else + hstat.st_size = from_oct (1 + 12, header->header.size); + + header->header.arch_name[NAMSIZ - 1] = '\0'; + if (header->header.linkflag == LF_LONGNAME + || header->header.linkflag == LF_LONGLINK) + { + longp = ((header->header.linkflag == LF_LONGNAME) + ? &next_long_name + : &next_long_link); + + userec (header); + if (*longp) + free (*longp); + bp = *longp = (char *) ck_malloc (hstat.st_size); + + for (size = hstat.st_size; + size > 0; + size -= written) + { + data = findrec ()->charptr; + if (data == NULL) + { + msg ("Unexpected EOF on archive file"); + break; + } + written = endofrecs ()->charptr - data; + if (written > size) + written = size; + + bcopy (data, bp, written); + bp += written; + userec ((union record *) (data + written - 1)); + } + goto recurse; + } + else + { + name = (next_long_name + ? next_long_name + : head->header.arch_name); + if (current_file_name) + free (current_file_name); + current_file_name = ck_malloc (strlen (name) + 1); + strcpy (current_file_name, name); + + name = (next_long_link + ? next_long_link + : head->header.arch_linkname); + if (current_link_name) + free (current_link_name); + current_link_name = ck_malloc (strlen (name) + 1); + strcpy (current_link_name, name); + + next_long_link = next_long_name = 0; + return 1; + } +} + + +/* + * Decode things from a file header record into a "struct stat". + * Also set "*stdp" to !=0 or ==0 depending whether header record is "Unix + * Standard" tar format or regular old tar format. + * + * read_header() has already decoded the checksum and length, so we don't. + * + * If wantug != 0, we want the uid/group info decoded from Unix Standard + * tapes (for extraction). If == 0, we are just printing anyway, so save time. + * + * decode_header should NOT be called twice for the same record, since the + * two calls might use different "wantug" values and thus might end up with + * different uid/gid for the two calls. If anybody wants the uid/gid they + * should decode it first, and other callers should decode it without uid/gid + * before calling a routine, e.g. print_header, that assumes decoded data. + */ +void +decode_header (header, st, stdp, wantug) + register union record *header; + register struct stat *st; + int *stdp; + int wantug; +{ + long from_oct (); + + st->st_mode = from_oct (8, header->header.mode); + st->st_mode &= 07777; + st->st_mtime = from_oct (1 + 12, header->header.mtime); + if (f_gnudump) + { + st->st_atime = from_oct (1 + 12, header->header.atime); + st->st_ctime = from_oct (1 + 12, header->header.ctime); + } + + if (0 == strcmp (header->header.magic, TMAGIC)) + { + /* Unix Standard tar archive */ + *stdp = 1; + if (wantug) + { +#ifdef NONAMES + st->st_uid = from_oct (8, header->header.uid); + st->st_gid = from_oct (8, header->header.gid); +#else + st->st_uid = + (*header->header.uname + ? finduid (header->header.uname) + : from_oct (8, header->header.uid)); + st->st_gid = + (*header->header.gname + ? findgid (header->header.gname) + : from_oct (8, header->header.gid)); +#endif + } +#if defined(S_IFBLK) || defined(S_IFCHR) + switch (header->header.linkflag) + { + case LF_BLK: + case LF_CHR: + st->st_rdev = makedev (from_oct (8, header->header.devmajor), + from_oct (8, header->header.devminor)); + } +#endif + } + else + { + /* Old fashioned tar archive */ + *stdp = 0; + st->st_uid = from_oct (8, header->header.uid); + st->st_gid = from_oct (8, header->header.gid); + st->st_rdev = 0; + } +} + + +/* + * Quick and dirty octal conversion. + * + * Result is -1 if the field is invalid (all blank, or nonoctal). + */ +long +from_oct (digs, where) + register int digs; + register char *where; +{ + register long value; + + while (isspace (*where)) + { /* Skip spaces */ + where++; + if (--digs <= 0) + return -1; /* All blank field */ + } + value = 0; + while (digs > 0 && isodigit (*where)) + { /* Scan til nonoctal */ + value = (value << 3) | (*where++ - '0'); + --digs; + } + + if (digs > 0 && *where && !isspace (*where)) + return -1; /* Ended on non-space/nul */ + + return value; +} + + +/* + * Actually print it. + * + * Plain and fancy file header block logging. + * Non-verbose just prints the name, e.g. for "tar t" or "tar x". + * This should just contain file names, so it can be fed back into tar + * with xargs or the "-T" option. The verbose option can give a bunch + * of info, one line per file. I doubt anybody tries to parse its + * format, or if they do, they shouldn't. Unix tar is pretty random here + * anyway. + * + * Note that print_header uses the globals <head>, <hstat>, and + * <head_standard>, which must be set up in advance. This is not very clean + * and should be cleaned up. FIXME. + */ +#define UGSWIDTH 18 /* min width of User, group, size */ +/* UGSWIDTH of 18 means that with user and group names <= 8 chars the columns + never shift during the listing. */ +#define DATEWIDTH 19 /* Last mod date */ +static int ugswidth = UGSWIDTH; /* Max width encountered so far */ + +void +print_header () +{ + char modes[11]; + char *timestamp; + char uform[11], gform[11]; /* These hold formatted ints */ + char *user, *group; + char size[24]; /* Holds a formatted long or maj, min */ + time_t longie; /* To make ctime() call portable */ + int pad; + char *name; + extern long baserec; + + if (f_sayblock) + fprintf (msg_file, "rec %10d: ", baserec + (ar_record - ar_block)); + /* annofile(msg_file, (char *)NULL); */ + + if (f_verbose <= 1) + { + /* Just the fax, mam. */ + char *name; + + name = quote_copy_string (current_file_name); + if (name == 0) + name = current_file_name; + fprintf (msg_file, "%s\n", name); + if (name != current_file_name) + free (name); + } + else + { + /* File type and modes */ + modes[0] = '?'; + switch (head->header.linkflag) + { + case LF_VOLHDR: + modes[0] = 'V'; + break; + + case LF_MULTIVOL: + modes[0] = 'M'; + break; + + case LF_NAMES: + modes[0] = 'N'; + break; + + case LF_LONGNAME: + case LF_LONGLINK: + msg ("Visible longname error\n"); + break; + + case LF_SPARSE: + case LF_NORMAL: + case LF_OLDNORMAL: + case LF_LINK: + modes[0] = '-'; + if ('/' == current_file_name[strlen (current_file_name) - 1]) + modes[0] = 'd'; + break; + case LF_DUMPDIR: + modes[0] = 'd'; + break; + case LF_DIR: + modes[0] = 'd'; + break; + case LF_SYMLINK: + modes[0] = 'l'; + break; + case LF_BLK: + modes[0] = 'b'; + break; + case LF_CHR: + modes[0] = 'c'; + break; + case LF_FIFO: + modes[0] = 'p'; + break; + case LF_CONTIG: + modes[0] = 'C'; + break; + } + + demode ((unsigned) hstat.st_mode, modes + 1); + + /* Timestamp */ + longie = hstat.st_mtime; + timestamp = ctime (&longie); + timestamp[16] = '\0'; + timestamp[24] = '\0'; + + /* User and group names */ + if (*head->header.uname && head_standard) + { + user = head->header.uname; + } + else + { + user = uform; + (void) sprintf (uform, "%d", + from_oct (8, head->header.uid)); + } + if (*head->header.gname && head_standard) + { + group = head->header.gname; + } + else + { + group = gform; + (void) sprintf (gform, "%d", + from_oct (8, head->header.gid)); + } + + /* Format the file size or major/minor device numbers */ + switch (head->header.linkflag) + { +#if defined(S_IFBLK) || defined(S_IFCHR) + case LF_CHR: + case LF_BLK: + (void) sprintf (size, "%d,%d", + major (hstat.st_rdev), + minor (hstat.st_rdev)); + break; +#endif + case LF_SPARSE: + (void) sprintf (size, "%ld", + from_oct (1 + 12, head->header.realsize)); + break; + default: + (void) sprintf (size, "%ld", (long) hstat.st_size); + } + + /* Figure out padding and print the whole line. */ + pad = strlen (user) + strlen (group) + strlen (size) + 1; + if (pad > ugswidth) + ugswidth = pad; + + name = quote_copy_string (current_file_name); + if (!name) + name = current_file_name; + fprintf (msg_file, "%s %s/%s %*s%s %s %s %s", + modes, + user, + group, + ugswidth - pad, + "", + size, + timestamp + 4, timestamp + 20, + name); + + if (name != current_file_name) + free (name); + switch (head->header.linkflag) + { + case LF_SYMLINK: + name = quote_copy_string (current_link_name); + if (!name) + name = current_link_name; + fprintf (msg_file, " -> %s\n", name); + if (name != current_link_name) + free (name); + break; + + case LF_LINK: + name = quote_copy_string (current_link_name); + if (!name) + name = current_link_name; + fprintf (msg_file, " link to %s\n", current_link_name); + if (name != current_link_name) + free (name); + break; + + default: + fprintf (msg_file, " unknown file type '%c'\n", + head->header.linkflag); + break; + + case LF_OLDNORMAL: + case LF_NORMAL: + case LF_SPARSE: + case LF_CHR: + case LF_BLK: + case LF_DIR: + case LF_FIFO: + case LF_CONTIG: + case LF_DUMPDIR: + putc ('\n', msg_file); + break; + + case LF_VOLHDR: + fprintf (msg_file, "--Volume Header--\n"); + break; + + case LF_MULTIVOL: + fprintf (msg_file, "--Continued at byte %ld--\n", from_oct (1 + 12, head->header.offset)); + break; + + case LF_NAMES: + fprintf (msg_file, "--Mangled file names--\n"); + break; + } + } + fflush (msg_file); +} + +/* + * Print a similar line when we make a directory automatically. + */ +void +pr_mkdir (pathname, length, mode) + char *pathname; + int length; + int mode; +{ + char modes[11]; + char *name; + extern long baserec; + + if (f_verbose > 1) + { + /* File type and modes */ + modes[0] = 'd'; + demode ((unsigned) mode, modes + 1); + + if (f_sayblock) + fprintf (msg_file, "rec %10d: ", baserec + (ar_record - ar_block)); + /* annofile(msg_file, (char *)NULL); */ + name = quote_copy_string (pathname); + if (!name) + name = pathname; + fprintf (msg_file, "%s %*s %.*s\n", + modes, + ugswidth + DATEWIDTH, + "Creating directory:", + length, + pathname); + if (name != pathname) + free (name); + } +} + + +/* + * Skip over <size> bytes of data in records in the archive. + */ +void +skip_file (size) + register long size; +{ + union record *x; + extern long save_totsize; + extern long save_sizeleft; + + if (f_multivol) + { + save_totsize = size; + save_sizeleft = size; + } + + while (size > 0) + { + x = findrec (); + if (x == NULL) + { /* Check it... */ + msg ("Unexpected EOF on archive file"); + exit (EX_BADARCH); + } + userec (x); + size -= RECORDSIZE; + if (f_multivol) + save_sizeleft -= RECORDSIZE; + } +} + +void +skip_extended_headers () +{ + register union record *exhdr; + + for (;;) + { + exhdr = findrec (); + if (!exhdr->ext_hdr.isextended) + { + userec (exhdr); + break; + } + userec (exhdr); + } +} + +/* + * Decode the mode string from a stat entry into a 9-char string and a null. + */ +void +demode (mode, string) + register unsigned mode; + register char *string; +{ + register unsigned mask; + register char *rwx = "rwxrwxrwx"; + + for (mask = 0400; mask != 0; mask >>= 1) + { + if (mode & mask) + *string++ = *rwx++; + else + { + *string++ = '-'; + rwx++; + } + } + + if (mode & S_ISUID) + if (string[-7] == 'x') + string[-7] = 's'; + else + string[-7] = 'S'; + if (mode & S_ISGID) + if (string[-4] == 'x') + string[-4] = 's'; + else + string[-4] = 'S'; + if (mode & S_ISVTX) + if (string[-1] == 'x') + string[-1] = 't'; + else + string[-1] = 'T'; + *string = '\0'; +} diff --git a/gnu/usr.bin/tar/mangle.c b/gnu/usr.bin/tar/mangle.c new file mode 100644 index 0000000..6281684 --- /dev/null +++ b/gnu/usr.bin/tar/mangle.c @@ -0,0 +1,270 @@ +/* mangle.c -- encode long filenames + Copyright (C) 1988, 1992 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <stdio.h> +#include <sys/types.h> +#include <time.h> +time_t time (); + +#include "tar.h" +#include "port.h" + +void add_buffer (); +extern PTR ck_malloc (); +void finish_header (); +extern PTR init_buffer (); +extern char *quote_copy_string (); +extern char *get_buffer (); +char *un_quote_string (); + +extern union record *start_header (); + +extern struct stat hstat; /* Stat struct corresponding */ + +struct mangled + { + struct mangled *next; + int type; + char mangled[NAMSIZ]; + char *linked_to; + char normal[1]; + }; + + +/* Should use a hash table, etc. . */ +struct mangled *first_mangle; +int mangled_num = 0; + +#if 0 /* Deleted because there is now a better way to do all this */ + +char * +find_mangled (name) + char *name; +{ + struct mangled *munge; + + for (munge = first_mangle; munge; munge = munge->next) + if (!strcmp (name, munge->normal)) + return munge->mangled; + return 0; +} + + +#ifdef S_ISLNK +void +add_symlink_mangle (symlink, linkto, buffer) + char *symlink; + char *linkto; + char *buffer; +{ + struct mangled *munge, *kludge; + + munge = (struct mangled *) ck_malloc (sizeof (struct mangled) + strlen (symlink) + strlen (linkto) + 2); + if (!first_mangle) + first_mangle = munge; + else + { + for (kludge = first_mangle; kludge->next; kludge = kludge->next) + ; + kludge->next = munge; + } + munge->type = 1; + munge->next = 0; + strcpy (munge->normal, symlink); + munge->linked_to = munge->normal + strlen (symlink) + 1; + strcpy (munge->linked_to, linkto); + sprintf (munge->mangled, "@@MaNgLeD.%d", mangled_num++); + strncpy (buffer, munge->mangled, NAMSIZ); +} + +#endif + +void +add_mangle (name, buffer) + char *name; + char *buffer; +{ + struct mangled *munge, *kludge; + + munge = (struct mangled *) ck_malloc (sizeof (struct mangled) + strlen (name)); + if (!first_mangle) + first_mangle = munge; + else + { + for (kludge = first_mangle; kludge->next; kludge = kludge->next) + ; + kludge->next = munge; + } + munge->next = 0; + munge->type = 0; + strcpy (munge->normal, name); + sprintf (munge->mangled, "@@MaNgLeD.%d", mangled_num++); + strncpy (buffer, munge->mangled, NAMSIZ); +} + +void +write_mangled () +{ + struct mangled *munge; + struct stat hstat; + union record *header; + char *ptr1, *ptr2; + PTR the_buffer; + int size; + int bufsize; + + if (!first_mangle) + return; + the_buffer = init_buffer (); + for (munge = first_mangle, size = 0; munge; munge = munge->next) + { + ptr1 = quote_copy_string (munge->normal); + if (!ptr1) + ptr1 = munge->normal; + if (munge->type) + { + add_buffer (the_buffer, "Symlink ", 8); + add_buffer (the_buffer, ptr1, strlen (ptr1)); + add_buffer (the_buffer, " to ", 4); + + if (ptr2 = quote_copy_string (munge->linked_to)) + { + add_buffer (the_buffer, ptr2, strlen (ptr2)); + free (ptr2); + } + else + add_buffer (the_buffer, munge->linked_to, strlen (munge->linked_to)); + } + else + { + add_buffer (the_buffer, "Rename ", 7); + add_buffer (the_buffer, munge->mangled, strlen (munge->mangled)); + add_buffer (the_buffer, " to ", 4); + add_buffer (the_buffer, ptr1, strlen (ptr1)); + } + add_buffer (the_buffer, "\n", 1); + if (ptr1 != munge->normal) + free (ptr1); + } + + bzero (&hstat, sizeof (struct stat)); + hstat.st_atime = hstat.st_mtime = hstat.st_ctime = time (0); + ptr1 = get_buffer (the_buffer); + hstat.st_size = strlen (ptr1); + + header = start_header ("././@MaNgLeD_NaMeS", &hstat); + header->header.linkflag = LF_NAMES; + finish_header (header); + size = hstat.st_size; + header = findrec (); + bufsize = endofrecs ()->charptr - header->charptr; + + while (bufsize < size) + { + bcopy (ptr1, header->charptr, bufsize); + ptr1 += bufsize; + size -= bufsize; + userec (header + (bufsize - 1) / RECORDSIZE); + header = findrec (); + bufsize = endofrecs ()->charptr - header->charptr; + } + bcopy (ptr1, header->charptr, size); + bzero (header->charptr + size, bufsize - size); + userec (header + (size - 1) / RECORDSIZE); +} + +#endif + +void +extract_mangle (head) + union record *head; +{ + char *buf; + char *fromtape; + char *to; + char *ptr, *ptrend; + char *nam1, *nam1end; + int size; + int copied; + + size = hstat.st_size; + buf = to = ck_malloc (size + 1); + buf[size] = '\0'; + while (size > 0) + { + fromtape = findrec ()->charptr; + if (fromtape == 0) + { + msg ("Unexpected EOF in mangled names!"); + return; + } + copied = endofrecs ()->charptr - fromtape; + if (copied > size) + copied = size; + bcopy (fromtape, to, copied); + to += copied; + size -= copied; + userec ((union record *) (fromtape + copied - 1)); + } + for (ptr = buf; *ptr; ptr = ptrend) + { + ptrend = index (ptr, '\n'); + *ptrend++ = '\0'; + + if (!strncmp (ptr, "Rename ", 7)) + { + nam1 = ptr + 7; + nam1end = index (nam1, ' '); + while (strncmp (nam1end, " to ", 4)) + { + nam1end++; + nam1end = index (nam1end, ' '); + } + *nam1end = '\0'; + if (ptrend[-2] == '/') + ptrend[-2] = '\0'; + un_quote_string (nam1end + 4); + if (rename (nam1, nam1end + 4)) + msg_perror ("Can't rename %s to %s", nam1, nam1end + 4); + else if (f_verbose) + msg ("Renamed %s to %s", nam1, nam1end + 4); + } +#ifdef S_ISLNK + else if (!strncmp (ptr, "Symlink ", 8)) + { + nam1 = ptr + 8; + nam1end = index (nam1, ' '); + while (strncmp (nam1end, " to ", 4)) + { + nam1end++; + nam1end = index (nam1end, ' '); + } + *nam1end = '\0'; + un_quote_string (nam1); + un_quote_string (nam1end + 4); + if (symlink (nam1, nam1end + 4) && (unlink (nam1end + 4) || symlink (nam1, nam1end + 4))) + msg_perror ("Can't symlink %s to %s", nam1, nam1end + 4); + else if (f_verbose) + msg ("Symlinkd %s to %s", nam1, nam1end + 4); + } +#endif + else + msg ("Unknown demangling command %s", ptr); + } +} diff --git a/gnu/usr.bin/tar/msd_dir.h b/gnu/usr.bin/tar/msd_dir.h new file mode 100644 index 0000000..06c7a64 --- /dev/null +++ b/gnu/usr.bin/tar/msd_dir.h @@ -0,0 +1,44 @@ +/* + * @(#)msd_dir.h 1.4 87/11/06 Public Domain. + * + * A public domain implementation of BSD directory routines for + * MS-DOS. Written by Michael Rendell ({uunet,utai}michael@garfield), + * August 1897 + */ + +#define rewinddir(dirp) seekdir(dirp, 0L) + +#define MAXNAMLEN 12 + +#ifdef __TURBOC__ +typedef int ino_t; +typedef int dev_t; +#endif + +struct dirent + { + ino_t d_ino; /* a bit of a farce */ + int d_reclen; /* more farce */ + int d_namlen; /* length of d_name */ + char d_name[MAXNAMLEN + 1]; /* garentee null termination */ + }; + +struct _dircontents + { + char *_d_entry; + struct _dircontents *_d_next; + }; + +typedef struct _dirdesc + { + int dd_id; /* uniquely identify each open directory */ + long dd_loc; /* where we are in directory entry is this */ + struct _dircontents *dd_contents; /* pointer to contents of dir */ + struct _dircontents *dd_cp; /* pointer to current position */ + } DIR; + +extern DIR *opendir (); +extern struct dirent *readdir (); +extern void seekdir (); +extern long telldir (); +extern void closedir (); diff --git a/gnu/usr.bin/tar/names.c b/gnu/usr.bin/tar/names.c new file mode 100644 index 0000000..0de6a88 --- /dev/null +++ b/gnu/usr.bin/tar/names.c @@ -0,0 +1,149 @@ +/* Look up user and/or group names. + Copyright (C) 1988, 1992 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * Look up user and/or group names. + * + * This file should be modified for non-unix systems to do something + * reasonable. + */ + +#include <sys/types.h> +#include "tar.h" +#include "port.h" + +#ifndef NONAMES +/* Whole module goes away if NONAMES defined. Otherwise... */ +#include <stdio.h> +#include <pwd.h> +#include <grp.h> + +static int saveuid = -993; +static char saveuname[TUNMLEN]; +static int my_uid = -993; + +static int savegid = -993; +static char savegname[TGNMLEN]; +static int my_gid = -993; + +#define myuid ( my_uid < 0? (my_uid = getuid()): my_uid ) +#define mygid ( my_gid < 0? (my_gid = getgid()): my_gid ) + +/* + * Look up a user or group name from a uid/gid, maintaining a cache. + * FIXME, for now it's a one-entry cache. + * FIXME2, the "-993" is to reduce the chance of a hit on the first lookup. + * + * This is ifdef'd because on Suns, it drags in about 38K of "yellow + * pages" code, roughly doubling the program size. Thanks guys. + */ +void +finduname (uname, uid) + char uname[TUNMLEN]; + int uid; +{ + struct passwd *pw; +#ifndef HAVE_GETPWUID + extern struct passwd *getpwuid (); +#endif + + if (uid != saveuid) + { + saveuid = uid; + saveuname[0] = '\0'; + pw = getpwuid (uid); + if (pw) + strncpy (saveuname, pw->pw_name, TUNMLEN); + } + strncpy (uname, saveuname, TUNMLEN); +} + +int +finduid (uname) + char uname[TUNMLEN]; +{ + struct passwd *pw; + extern struct passwd *getpwnam (); + + if (uname[0] != saveuname[0] /* Quick test w/o proc call */ + || 0 != strncmp (uname, saveuname, TUNMLEN)) + { + strncpy (saveuname, uname, TUNMLEN); + pw = getpwnam (uname); + if (pw) + { + saveuid = pw->pw_uid; + } + else + { + saveuid = myuid; + } + } + return saveuid; +} + + +void +findgname (gname, gid) + char gname[TGNMLEN]; + int gid; +{ + struct group *gr; +#ifndef HAVE_GETGRGID + extern struct group *getgrgid (); +#endif + + if (gid != savegid) + { + savegid = gid; + savegname[0] = '\0'; + (void) setgrent (); + gr = getgrgid (gid); + if (gr) + strncpy (savegname, gr->gr_name, TGNMLEN); + } + (void) strncpy (gname, savegname, TGNMLEN); +} + + +int +findgid (gname) + char gname[TUNMLEN]; +{ + struct group *gr; + extern struct group *getgrnam (); + + if (gname[0] != savegname[0] /* Quick test w/o proc call */ + || 0 != strncmp (gname, savegname, TUNMLEN)) + { + strncpy (savegname, gname, TUNMLEN); + gr = getgrnam (gname); + if (gr) + { + savegid = gr->gr_gid; + } + else + { + savegid = mygid; + } + } + return savegid; +} + +#endif diff --git a/gnu/usr.bin/tar/open3.h b/gnu/usr.bin/tar/open3.h new file mode 100644 index 0000000..c1c0e59 --- /dev/null +++ b/gnu/usr.bin/tar/open3.h @@ -0,0 +1,67 @@ +/* Defines for Sys V style 3-argument open call. + Copyright (C) 1988 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * open3.h -- #defines for the various flags for the Sys V style 3-argument + * open() call. On BSD or System 5, the system already has this in an + * include file. This file is needed for V7 and MINIX systems for the + * benefit of open3() in port.c, a routine that emulates the 3-argument + * call using system calls available on V7/MINIX. + * + * This file is needed by PD tar even if we aren't using the + * emulator, since the #defines for O_WRONLY, etc. are used in + * a couple of places besides the open() calls, (e.g. in the assignment + * to openflag in extract.c). We just #include this rather than + * #ifdef them out. + * + * Written 6/10/87 by rmtodd@uokmax (Richard Todd). + * + * The names have been changed by John Gilmore, 31 July 1987, since + * Richard called it "bsdopen", and really this change was introduced in + * AT&T Unix systems before BSD picked it up. + */ + +/* Only one of the next three should be specified */ +#define O_RDONLY 0 /* only allow read */ +#define O_WRONLY 1 /* only allow write */ +#define O_RDWR 2 /* both are allowed */ + +/* The rest of these can be OR-ed in to the above. */ +/* + * O_NDELAY isn't implemented by the emulator. It's only useful (to tar) on + * systems that have named pipes anyway; it prevents tar's hanging by + * opening a named pipe. We #ifndef it because some systems already have + * it defined. + */ +#ifndef O_NDELAY +#define O_NDELAY 4 /* don't block on opening devices that would + * block on open -- ignored by emulator. */ +#endif +#define O_CREAT 8 /* create file if needed */ +#define O_EXCL 16 /* file cannot already exist */ +#define O_TRUNC 32 /* truncate file on open */ +#define O_APPEND 64 /* always write at end of file -- ignored by emul */ + +#ifdef EMUL_OPEN3 +/* + * make emulation transparent to rest of file -- redirect all open() calls + * to our routine + */ +#define open open3 +#endif diff --git a/gnu/usr.bin/tar/pathmax.h b/gnu/usr.bin/tar/pathmax.h new file mode 100644 index 0000000..aeba9f7 --- /dev/null +++ b/gnu/usr.bin/tar/pathmax.h @@ -0,0 +1,53 @@ +/* Define PATH_MAX somehow. Requires sys/types.h. + Copyright (C) 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _PATHMAX_H +#define _PATHMAX_H + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +/* Non-POSIX BSD systems might have gcc's limits.h, which doesn't define + PATH_MAX but might cause redefinition warnings when sys/param.h is + later included (as on MORE/BSD 4.3). */ +#if defined(_POSIX_VERSION) || (defined(HAVE_LIMITS_H) && defined(USG)) +#include <limits.h> +#endif + +#ifndef _POSIX_PATH_MAX +#define _POSIX_PATH_MAX 255 +#endif + +#if !defined(PATH_MAX) && defined(_PC_PATH_MAX) +#define PATH_MAX (pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 : pathconf ("/", _PC_PATH_MAX)) +#endif + +/* Don't include sys/param.h if it already has been. */ +#if !defined(PATH_MAX) && !defined(MAXPATHLEN) && !defined(__MSDOS__) +#include <sys/param.h> +#endif + +#if !defined(PATH_MAX) && defined(MAXPATHLEN) +#define PATH_MAX MAXPATHLEN +#endif + +#ifndef PATH_MAX +#define PATH_MAX _POSIX_PATH_MAX +#endif + +#endif /* _PATHMAX_H */ diff --git a/gnu/usr.bin/tar/port.c b/gnu/usr.bin/tar/port.c new file mode 100644 index 0000000..10ec32e --- /dev/null +++ b/gnu/usr.bin/tar/port.c @@ -0,0 +1,1256 @@ +/* Supporting routines which may sometimes be missing. + Copyright (C) 1988, 1992 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <stdio.h> +#include <sys/types.h> +#include <signal.h> +#include <errno.h> +#ifndef STDC_HEADERS +extern int errno; +#endif + +#ifdef BSD42 +#include <sys/file.h> +#else +#ifndef V7 +#include <fcntl.h> +#endif +#endif + +#include "tar.h" +#include "port.h" + +extern long baserec; + +/* All machine-dependent #ifdefs should appear here, instead of + being scattered through the file. For UN*X systems, it is better to + figure out what is needed in the configure script, for most of the + features. */ + +#ifdef __MSDOS__ +char TTY_NAME[] = "con"; +#define HAVE_STRSTR +#define HAVE_RENAME +#define HAVE_MKDIR +#else +char TTY_NAME[] = "/dev/tty"; +#endif + +/* End of system-dependent #ifdefs */ + + +#ifndef HAVE_VALLOC +/* + * valloc() does a malloc() on a page boundary. On some systems, + * this can make large block I/O more efficient. + */ +char * +valloc (size) + unsigned size; +{ + return (malloc (size)); +} + +#endif /* !HAVE_VALLOC */ + +#ifndef HAVE_MKDIR +/* + * Written by Robert Rother, Mariah Corporation, August 1985. + * + * If you want it, it's yours. All I ask in return is that if you + * figure out how to do this in a Bourne Shell script you send me + * a copy. + * sdcsvax!rmr or rmr@uscd + * + * Severely hacked over by John Gilmore to make a 4.2BSD compatible + * subroutine. 11Mar86; hoptoad!gnu + * + * Modified by rmtodd@uokmax 6-28-87 -- when making an already existing dir, + * subroutine didn't return EEXIST. It does now. + */ + +/* + * Make a directory. + */ +int +mkdir (dpath, dmode) + char *dpath; + int dmode; +{ + int cpid, status; + struct stat statbuf; + + if (stat (dpath, &statbuf) == 0) + { + errno = EEXIST; /* Stat worked, so it already exists */ + return -1; + } + + /* If stat fails for a reason other than non-existence, return error */ + if (errno != ENOENT) + return -1; + + switch (cpid = fork ()) + { + + case -1: /* Error in fork() */ + return (-1); /* Errno is set already */ + + case 0: /* Child process */ + /* + * Cheap hack to set mode of new directory. Since this + * child process is going away anyway, we zap its umask. + * FIXME, this won't suffice to set SUID, SGID, etc. on this + * directory. Does anybody care? + */ + status = umask (0); /* Get current umask */ + status = umask (status | (0777 & ~dmode)); /* Set for mkdir */ + execl ("/bin/mkdir", "mkdir", dpath, (char *) 0); + _exit (-1); /* Can't exec /bin/mkdir */ + + default: /* Parent process */ + while (cpid != wait (&status)); /* Wait for kid to finish */ + } + + if (WIFSIGNALED (status) || WEXITSTATUS (status) != 0) + { + errno = EIO; /* We don't know why, but */ + return -1; /* /bin/mkdir failed */ + } + + return 0; +} + +int +rmdir (dpath) + char *dpath; +{ + int cpid, status; + struct stat statbuf; + + if (stat (dpath, &statbuf) != 0) + { + /* Stat just set errno. We don't have to */ + return -1; + } + + switch (cpid = fork ()) + { + + case -1: /* Error in fork() */ + return (-1); /* Errno is set already */ + + case 0: /* Child process */ + execl ("/bin/rmdir", "rmdir", dpath, (char *) 0); + _exit (-1); /* Can't exec /bin/mkdir */ + + default: /* Parent process */ + while (cpid != wait (&status)); /* Wait for kid to finish */ + } + + if (WIFSIGNALED (status) || WEXITSTATUS (status) != 0) + { + errno = EIO; /* We don't know why, but */ + return -1; /* /bin/mkdir failed */ + } + + return 0; +} + +#endif /* !HAVE_MKDIR */ + +#ifndef HAVE_RENAME +/* Rename file FROM to file TO. + Return 0 if successful, -1 if not. */ + +int +rename (from, to) + char *from; + char *to; +{ + struct stat from_stats; + + if (stat (from, &from_stats)) + return -1; + + if (unlink (to) && errno != ENOENT) + return -1; + + if (link (from, to)) + return -1; + + if (unlink (from) && errno != ENOENT) + { + unlink (to); + return -1; + } + + return 0; +} + +#endif /* !HAVE_RENAME */ + +#ifdef minix +/* Minix has bcopy but not bzero, and no memset. Thanks, Andy. */ +void +bzero (s1, n) + register char *s1; + register int n; +{ + while (n--) + *s1++ = '\0'; +} + +/* It also has no bcmp() */ +int +bcmp (s1, s2, n) + register char *s1, *s2; + register int n; +{ + for (; n--; ++s1, ++s2) + { + if (*s1 != *s2) + return *s1 - *s2; + } + return 0; +} + +/* + * Groan, Minix doesn't have execlp either! + * + * execlp(file,arg0,arg1...argn,(char *)NULL) + * exec a program, automatically searching for the program through + * all the directories on the PATH. + * + * This version is naive about variable argument lists, it assumes + * a straightforward C calling sequence. If your system has odd stacks + * *and* doesn't have execlp, YOU get to fix it. + */ +int +execlp (filename, arg0) + char *filename, *arg0; +{ + register char *p, *path; + register char *fnbuffer; + char **argstart = &arg0; + struct stat statbuf; + extern char **environ; + + if ((p = getenv ("PATH")) == NULL) + { + /* couldn't find path variable -- try to exec given filename */ + return execve (filename, argstart, environ); + } + + /* + * make a place to build the filename. We malloc larger than we + * need, but we know it will fit in this. + */ + fnbuffer = malloc (strlen (p) + 1 + strlen (filename)); + if (fnbuffer == NULL) + { + errno = ENOMEM; + return -1; + } + + /* + * try each component of the path to see if the file's there + * and executable. + */ + for (path = p; path; path = p) + { + /* construct full path name to try */ + if ((p = index (path, ':')) == NULL) + { + strcpy (fnbuffer, path); + } + else + { + strncpy (fnbuffer, path, p - path); + fnbuffer[p - path] = '\0'; + p++; /* Skip : for next time */ + } + if (strlen (fnbuffer) != 0) + strcat (fnbuffer, "/"); + strcat (fnbuffer, filename); + + /* check to see if file is there and is a normal file */ + if (stat (fnbuffer, &statbuf) < 0) + { + if (errno == ENOENT) + continue; /* file not there,keep on looking */ + else + goto fail; /* failed for some reason, return */ + } + if (!S_ISREG (statbuf.st_mode)) + continue; + + if (execve (fnbuffer, argstart, environ) < 0 + && errno != ENOENT + && errno != ENOEXEC) + { + /* failed, for some other reason besides "file + * not found" or "not a.out format" + */ + goto fail; + } + + /* + * If we got error ENOEXEC, the file is executable but is + * not an object file. Try to execute it as a shell script, + * returning error if we can't execute /bin/sh. + * + * FIXME, this code is broken in several ways. Shell + * scripts should not in general be executed by the user's + * SHELL variable program. On more mature systems, the + * script can specify with #!/bin/whatever. Also, this + * code clobbers argstart[-1] if the exec of the shell + * fails. + */ + if (errno == ENOEXEC) + { + char *shell; + + /* Try to execute command "sh arg0 arg1 ..." */ + if ((shell = getenv ("SHELL")) == NULL) + shell = "/bin/sh"; + argstart[-1] = shell; + argstart[0] = fnbuffer; + execve (shell, &argstart[-1], environ); + goto fail; /* Exec didn't work */ + } + + /* + * If we succeeded, the execve() doesn't return, so we + * can only be here is if the file hasn't been found yet. + * Try the next place on the path. + */ + } + + /* all attempts failed to locate the file. Give up. */ + errno = ENOENT; + +fail: + free (fnbuffer); + return -1; +} + +#endif /* minix */ + + +#ifdef EMUL_OPEN3 +#include "open3.h" +/* + * open3 -- routine to emulate the 3-argument open system + * call that is present in most modern Unix systems. + * This version attempts to support all the flag bits except for O_NDELAY + * and O_APPEND, which are silently ignored. The emulation is not as efficient + * as the real thing (at worst, 4 system calls instead of one), but there's + * not much I can do about that. + * + * Written 6/10/87 by rmtodd@uokmax + * + * open3(path, flag, mode) + * Attempts to open the file specified by + * the given pathname. The following flag bits (#defined in tar.h) + * specify options to the routine: + * O_RDONLY file open for read only + * O_WRONLY file open for write only + * O_RDWR file open for both read & write + * (Needless to say, you should only specify one of the above). + * O_CREAT file is created with specified mode if it needs to be. + * O_TRUNC if file exists, it is truncated to 0 bytes + * O_EXCL used with O_CREAT--routine returns error if file exists + * Function returns file descriptor if successful, -1 and errno if not. + */ + +/* + * array to give arguments to access for various modes + * FIXME, this table depends on the specific integer values of O_XXX, + * and also contains integers (args to 'access') that should be #define's. + */ +static int modes[] = +{ + 04, /* O_RDONLY */ + 02, /* O_WRONLY */ + 06, /* O_RDWR */ + 06, /* invalid but we'd better cope -- O_WRONLY+O_RDWR */ +}; + +/* Shut off the automatic emulation of open(), we'll need it. */ +#undef open + +int +open3 (path, flags, mode) + char *path; + int flags, mode; +{ + int exists = 1; + int call_creat = 0; + int fd; + /* + * We actually do the work by calling the open() or creat() system + * call, depending on the flags. Call_creat is true if we will use + * creat(), false if we will use open(). + */ + + /* + * See if the file exists and is accessible in the requested mode. + * + * Strictly speaking we shouldn't be using access, since access checks + * against real uid, and the open call should check against euid. + * Most cases real uid == euid, so it won't matter. FIXME. + * FIXME, the construction "flags & 3" and the modes table depends + * on the specific integer values of the O_XXX #define's. Foo! + */ + if (access (path, modes[flags & 3]) < 0) + { + if (errno == ENOENT) + { + /* the file does not exist */ + exists = 0; + } + else + { + /* probably permission violation */ + if (flags & O_EXCL) + { + /* Oops, the file exists, we didn't want it. */ + /* No matter what the error, claim EEXIST. */ + errno = EEXIST; + } + return -1; + } + } + + /* if we have the O_CREAT bit set, check for O_EXCL */ + if (flags & O_CREAT) + { + if ((flags & O_EXCL) && exists) + { + /* Oops, the file exists and we didn't want it to. */ + errno = EEXIST; + return -1; + } + /* + * If the file doesn't exist, be sure to call creat() so that + * it will be created with the proper mode. + */ + if (!exists) + call_creat = 1; + } + else + { + /* If O_CREAT isn't set and the file doesn't exist, error. */ + if (!exists) + { + errno = ENOENT; + return -1; + } + } + + /* + * If the O_TRUNC flag is set and the file exists, we want to call + * creat() anyway, since creat() guarantees that the file will be + * truncated and open()-for-writing doesn't. + * (If the file doesn't exist, we're calling creat() anyway and the + * file will be created with zero length.) + */ + if ((flags & O_TRUNC) && exists) + call_creat = 1; + /* actually do the call */ + if (call_creat) + { + /* + * call creat. May have to close and reopen the file if we + * want O_RDONLY or O_RDWR access -- creat() only gives + * O_WRONLY. + */ + fd = creat (path, mode); + if (fd < 0 || (flags & O_WRONLY)) + return fd; + if (close (fd) < 0) + return -1; + /* Fall out to reopen the file we've created */ + } + + /* + * calling old open, we strip most of the new flags just in case. + */ + return open (path, flags & (O_RDONLY | O_WRONLY | O_RDWR | O_BINARY)); +} + +#endif /* EMUL_OPEN3 */ + +#ifndef HAVE_MKNOD +#ifdef __MSDOS__ +typedef int dev_t; +#endif +/* Fake mknod by complaining */ +int +mknod (path, mode, dev) + char *path; + unsigned short mode; + dev_t dev; +{ + int fd; + + errno = ENXIO; /* No such device or address */ + return -1; /* Just give an error */ +} + +/* Fake links by copying */ +int +link (path1, path2) + char *path1; + char *path2; +{ + char buf[256]; + int ifd, ofd; + int nrbytes; + int nwbytes; + + fprintf (stderr, "%s: %s: cannot link to %s, copying instead\n", + tar, path1, path2); + if ((ifd = open (path1, O_RDONLY | O_BINARY)) < 0) + return -1; + if ((ofd = creat (path2, 0666)) < 0) + return -1; + setmode (ofd, O_BINARY); + while ((nrbytes = read (ifd, buf, sizeof (buf))) > 0) + { + if ((nwbytes = write (ofd, buf, nrbytes)) != nrbytes) + { + nrbytes = -1; + break; + } + } + /* Note use of "|" rather than "||" below: we want to close + * the files even if an error occurs. + */ + if ((nrbytes < 0) | (0 != close (ifd)) | (0 != close (ofd))) + { + unlink (path2); + return -1; + } + return 0; +} + +/* everyone owns everything on MS-DOS (or is it no one owns anything?) */ +int +chown (path, uid, gid) + char *path; + int uid; + int gid; +{ + return 0; +} + +int +geteuid () +{ + return 0; +} + +#endif /* !HAVE_MKNOD */ + +#ifdef __TURBOC__ +#include <time.h> +#include <fcntl.h> +#include <io.h> + +struct utimbuf +{ + time_t actime; /* Access time. */ + time_t modtime; /* Modification time. */ +}; + +int +utime (char *filename, struct utimbuf *utb) +{ + struct tm *tm; + struct ftime filetime; + time_t when; + int fd; + int status; + + if (utb == 0) + when = time (0); + else + when = utb->modtime; + + fd = _open (filename, O_RDWR); + if (fd == -1) + return -1; + + tm = localtime (&when); + if (tm->tm_year < 80) + filetime.ft_year = 0; + else + filetime.ft_year = tm->tm_year - 80; + filetime.ft_month = tm->tm_mon + 1; + filetime.ft_day = tm->tm_mday; + if (tm->tm_hour < 0) + filetime.ft_hour = 0; + else + filetime.ft_hour = tm->tm_hour; + filetime.ft_min = tm->tm_min; + filetime.ft_tsec = tm->tm_sec / 2; + + status = setftime (fd, &filetime); + _close (fd); + return status; +} + +#endif /* __TURBOC__ */ + +/* Stash argv[0] here so panic will know what the program is called */ +char *myname = 0; + +void +panic (s) + char *s; +{ + if (myname) + fprintf (stderr, "%s:", myname); + fprintf (stderr, s); + putc ('\n', stderr); + exit (12); +} + + +PTR +ck_malloc (size) + size_t size; +{ + PTR ret; + + if (!size) + size++; + ret = malloc (size); + if (ret == 0) + panic ("Couldn't allocate memory"); + return ret; +} + +/* Used by alloca.c and bison.simple. */ +char * +xmalloc (size) + size_t size; +{ + return (char *) ck_malloc (size); +} + +PTR +ck_realloc (ptr, size) + PTR ptr; + size_t size; +{ + PTR ret; + + if (!ptr) + ret = ck_malloc (size); + else + ret = realloc (ptr, size); + if (ret == 0) + panic ("Couldn't re-allocate memory"); + return ret; +} + +/* Implement a variable sized buffer of 'stuff'. We don't know what it is, + nor do we care, as long as it doesn't mind being aligned on a char boundry. + */ + +struct buffer + { + int allocated; + int length; + char *b; + }; + +#define MIN_ALLOCATE 50 + +char * +init_buffer () +{ + struct buffer *b; + + b = (struct buffer *) ck_malloc (sizeof (struct buffer)); + b->allocated = MIN_ALLOCATE; + b->b = (char *) ck_malloc (MIN_ALLOCATE); + b->length = 0; + return (char *) b; +} + +void +flush_buffer (bb) + char *bb; +{ + struct buffer *b; + + b = (struct buffer *) bb; + free (b->b); + b->b = 0; + b->allocated = 0; + b->length = 0; + free ((void *) b); +} + +void +add_buffer (bb, p, n) + char *bb; + char *p; + int n; +{ + struct buffer *b; + + b = (struct buffer *) bb; + if (b->length + n > b->allocated) + { + b->allocated = b->length + n + MIN_ALLOCATE; + b->b = (char *) ck_realloc (b->b, b->allocated); + } + bcopy (p, b->b + b->length, n); + b->length += n; +} + +char * +get_buffer (bb) + char *bb; +{ + struct buffer *b; + + b = (struct buffer *) bb; + return b->b; +} + +char * +merge_sort (list, n, off, cmp) + char *list; + int (*cmp) (); + unsigned n; + int off; +{ + char *ret; + + char *alist, *blist; + unsigned alength, blength; + + char *tptr; + int tmp; + char **prev; +#define NEXTOF(ptr) (* ((char **)(((char *)(ptr))+off) ) ) + if (n == 1) + return list; + if (n == 2) + { + if ((*cmp) (list, NEXTOF (list)) > 0) + { + ret = NEXTOF (list); + NEXTOF (ret) = list; + NEXTOF (list) = 0; + return ret; + } + return list; + } + alist = list; + alength = (n + 1) / 2; + blength = n / 2; + for (tptr = list, tmp = (n - 1) / 2; tmp; tptr = NEXTOF (tptr), tmp--) + ; + blist = NEXTOF (tptr); + NEXTOF (tptr) = 0; + + alist = merge_sort (alist, alength, off, cmp); + blist = merge_sort (blist, blength, off, cmp); + prev = &ret; + for (; alist && blist;) + { + if ((*cmp) (alist, blist) < 0) + { + tptr = NEXTOF (alist); + *prev = alist; + prev = &(NEXTOF (alist)); + alist = tptr; + } + else + { + tptr = NEXTOF (blist); + *prev = blist; + prev = &(NEXTOF (blist)); + blist = tptr; + } + } + if (alist) + *prev = alist; + else + *prev = blist; + + return ret; +} + +void +ck_close (fd) + int fd; +{ + if (close (fd) < 0) + { + msg_perror ("can't close a file #%d", fd); + exit (EX_SYSTEM); + } +} + +#include <ctype.h> + +/* Quote_copy_string is like quote_string, but instead of modifying the + string in place, it malloc-s a copy of the string, and returns that. + If the string does not have to be quoted, it returns the NULL string. + The allocated copy can, of course, be freed with free() after the + caller is done with it. + */ +char * +quote_copy_string (string) + char *string; +{ + char *from_here; + char *to_there = 0; + char *copy_buf = 0; + int c; + int copying = 0; + + from_here = string; + while (*from_here) + { + c = *from_here++; + if (c == '\\') + { + if (!copying) + { + int n; + + n = (from_here - string) - 1; + copying++; + copy_buf = (char *) malloc (n + 5 + strlen (from_here) * 4); + if (!copy_buf) + return 0; + bcopy (string, copy_buf, n); + to_there = copy_buf + n; + } + *to_there++ = '\\'; + *to_there++ = '\\'; + } + else if (isprint (c)) + { + if (copying) + *to_there++ = c; + } + else + { + if (!copying) + { + int n; + + n = (from_here - string) - 1; + copying++; + copy_buf = (char *) malloc (n + 5 + strlen (from_here) * 4); + if (!copy_buf) + return 0; + bcopy (string, copy_buf, n); + to_there = copy_buf + n; + } + *to_there++ = '\\'; + if (c == '\n') + *to_there++ = 'n'; + else if (c == '\t') + *to_there++ = 't'; + else if (c == '\f') + *to_there++ = 'f'; + else if (c == '\b') + *to_there++ = 'b'; + else if (c == '\r') + *to_there++ = 'r'; + else if (c == '\177') + *to_there++ = '?'; + else + { + to_there[0] = (c >> 6) + '0'; + to_there[1] = ((c >> 3) & 07) + '0'; + to_there[2] = (c & 07) + '0'; + to_there += 3; + } + } + } + if (copying) + { + *to_there = '\0'; + return copy_buf; + } + return (char *) 0; +} + + +/* Un_quote_string takes a quoted c-string (like those produced by + quote_string or quote_copy_string and turns it back into the + un-quoted original. This is done in place. + */ + +/* There is no un-quote-copy-string. Write it yourself */ + +char * +un_quote_string (string) + char *string; +{ + char *ret; + char *from_here; + char *to_there; + int tmp; + + ret = string; + to_there = string; + from_here = string; + while (*from_here) + { + if (*from_here != '\\') + { + if (from_here != to_there) + *to_there++ = *from_here++; + else + from_here++, to_there++; + continue; + } + switch (*++from_here) + { + case '\\': + *to_there++ = *from_here++; + break; + case 'n': + *to_there++ = '\n'; + from_here++; + break; + case 't': + *to_there++ = '\t'; + from_here++; + break; + case 'f': + *to_there++ = '\f'; + from_here++; + break; + case 'b': + *to_there++ = '\b'; + from_here++; + break; + case 'r': + *to_there++ = '\r'; + from_here++; + break; + case '?': + *to_there++ = 0177; + from_here++; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + tmp = *from_here - '0'; + from_here++; + if (*from_here < '0' || *from_here > '7') + { + *to_there++ = tmp; + break; + } + tmp = tmp * 8 + *from_here - '0'; + from_here++; + if (*from_here < '0' || *from_here > '7') + { + *to_there++ = tmp; + break; + } + tmp = tmp * 8 + *from_here - '0'; + from_here++; + *to_there = tmp; + break; + default: + ret = 0; + *to_there++ = '\\'; + *to_there++ = *from_here++; + break; + } + } + if (*to_there) + *to_there++ = '\0'; + return ret; +} + +#ifndef __MSDOS__ +void +ck_pipe (pipes) + int *pipes; +{ + if (pipe (pipes) < 0) + { + msg_perror ("can't open a pipe"); + exit (EX_SYSTEM); + } +} +#endif /* !__MSDOS__ */ + +#ifndef HAVE_STRSTR +/* + * strstr - find first occurrence of wanted in s + */ + +char * /* found string, or NULL if none */ +strstr (s, wanted) + char *s; + char *wanted; +{ + register char *scan; + register size_t len; + register char firstc; + + if (*wanted == '\0') + return (char *) 0; + /* + * The odd placement of the two tests is so "" is findable. + * Also, we inline the first char for speed. + * The ++ on scan has been moved down for optimization. + */ + firstc = *wanted; + len = strlen (wanted); + for (scan = s; *scan != firstc || strncmp (scan, wanted, len) != 0;) + if (*scan++ == '\0') + return (char *) 0; + return scan; +} + +#endif /* !HAVE_STRSTR */ + +#ifndef HAVE_FTRUNCATE + +#ifdef F_CHSIZE +int +ftruncate (fd, length) + int fd; + off_t length; +{ + return fcntl (fd, F_CHSIZE, length); +} + +#else /* !F_CHSIZE */ +#ifdef F_FREESP +/* code courtesy of William Kucharski, kucharsk@Solbourne.com */ + +int +ftruncate (fd, length) + int fd; /* file descriptor */ + off_t length; /* length to set file to */ +{ + struct flock fl; + + fl.l_whence = 0; + fl.l_len = 0; + fl.l_start = length; + fl.l_type = F_WRLCK; /* write lock on file space */ + + /* + * This relies on the UNDOCUMENTED F_FREESP argument to + * fcntl(2), which truncates the file so that it ends at the + * position indicated by fl.l_start. + * + * Will minor miracles never cease? + */ + + if (fcntl (fd, F_FREESP, &fl) < 0) + return -1; + + return 0; +} + +#else /* !F_FREESP */ + +int +ftruncate (fd, length) + int fd; + off_t length; +{ + errno = EIO; + return -1; +} + +#endif /* !F_FREESP */ +#endif /* !F_CHSIZE */ +#endif /* !HAVE_FTRUNCATE */ + + +extern FILE *msg_file; + +#if defined (HAVE_VPRINTF) && __STDC__ +#include <stdarg.h> + +void +msg (char *str,...) +{ + va_list args; + + va_start (args, str); + fflush (msg_file); + fprintf (stderr, "%s: ", tar); + if (f_sayblock) + fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block)); + vfprintf (stderr, str, args); + va_end (args); + putc ('\n', stderr); + fflush (stderr); +} + +void +msg_perror (char *str,...) +{ + va_list args; + int save_e; + + save_e = errno; + fflush (msg_file); + fprintf (stderr, "%s: ", tar); + if (f_sayblock) + fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block)); + va_start (args, str); + vfprintf (stderr, str, args); + va_end (args); + errno = save_e; + perror (" "); + fflush (stderr); +} + +#endif /* HAVE_VPRINTF and __STDC__ */ + +#if defined(HAVE_VPRINTF) && !__STDC__ +#include <varargs.h> +void +msg (str, va_alist) + char *str; + va_dcl +{ + va_list args; + + fflush (msg_file); + fprintf (stderr, "%s: ", tar); + if (f_sayblock) + fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block)); + va_start (args); + vfprintf (stderr, str, args); + va_end (args); + putc ('\n', stderr); + fflush (stderr); +} + +void +msg_perror (str, va_alist) + char *str; + va_dcl +{ + va_list args; + int save_e; + + save_e = errno; + fflush (msg_file); + fprintf (stderr, "%s: ", tar); + if (f_sayblock) + fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block)); + va_start (args); + vfprintf (stderr, str, args); + va_end (args); + errno = save_e; + perror (" "); + fflush (stderr); +} + +#endif /* HAVE_VPRINTF and not __STDC__ */ + +#if !defined(HAVE_VPRINTF) && defined(HAVE_DOPRNT) +void +msg (str, args) + char *str; + int args; +{ + fflush (msg_file); + fprintf (stderr, "%s: ", tar); + if (f_sayblock) + fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block)); + _doprnt (str, &args, stderr); + putc ('\n', stderr); + fflush (stderr); +} + +void +msg_perror (str, args) + char *str; + int args; +{ + int save_e; + + save_e = errno; + fflush (msg_file); + fprintf (stderr, "%s: ", tar); + if (f_sayblock) + fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block)); + _doprnt (str, &args, stderr); + errno = save_e; + perror (" "); + fflush (stderr); +} + +#endif /* !HAVE_VPRINTF and HAVE_DOPRNT */ + +#if !defined(HAVE_VPRINTF) && !defined(HAVE_DOPRNT) +void +msg (str, a1, a2, a3, a4, a5, a6) + char *str; +{ + fflush (msg_file); + fprintf (stderr, "%s: ", tar); + if (f_sayblock) + fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block)); + fprintf (stderr, str, a1, a2, a3, a4, a5, a6); + putc ('\n', stderr); + fflush (stderr); +} + +void +msg_perror (str, a1, a2, a3, a4, a5, a6) + char *str; +{ + int save_e; + + save_e = errno; + fflush (msg_file); + fprintf (stderr, "%s: ", tar); + if (f_sayblock) + fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block)); + fprintf (stderr, str, a1, a2, a3, a4, a5, a6); + fprintf (stderr, ": "); + errno = save_e; + perror (" "); +} + +#endif /* !HAVE_VPRINTF and !HAVE_DOPRNT */ diff --git a/gnu/usr.bin/tar/port.h b/gnu/usr.bin/tar/port.h new file mode 100644 index 0000000..4e65a9a --- /dev/null +++ b/gnu/usr.bin/tar/port.h @@ -0,0 +1,215 @@ +/* Portability declarations. Requires sys/types.h. + Copyright (C) 1988, 1992 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* AIX requires this to be the first thing in the file. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include <alloca.h> +#else /* not HAVE_ALLOCA_H */ +#ifdef _AIX + #pragma alloca +#else /* not _AIX */ +char *alloca (); +#endif /* not _AIX */ +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ + +#include "pathmax.h" + +#ifdef _POSIX_VERSION +#include <sys/wait.h> +#else /* !_POSIX_VERSION */ +#define WIFSTOPPED(w) (((w) & 0xff) == 0x7f) +#define WIFSIGNALED(w) (((w) & 0xff) != 0x7f && ((w) & 0xff) != 0) +#define WIFEXITED(w) (((w) & 0xff) == 0) + +#define WSTOPSIG(w) (((w) >> 8) & 0xff) +#define WTERMSIG(w) ((w) & 0x7f) +#define WEXITSTATUS(w) (((w) >> 8) & 0xff) +#endif /* _POSIX_VERSION */ + +/* nonstandard */ +#ifndef WIFCOREDUMPED +#define WIFCOREDUMPED(w) (((w) & 0x80) != 0) +#endif + +#ifdef __MSDOS__ +/* missing things from sys/stat.h */ +#define S_ISUID 0 +#define S_ISGID 0 +#define S_ISVTX 0 + +/* device stuff */ +#define makedev(ma, mi) ((ma << 8) | mi) +#define major(dev) (dev) +#define minor(dev) (dev) +typedef long off_t; +#endif /* __MSDOS__ */ + +#if defined(__STDC__) || defined(__TURBOC__) +#define PTR void * +#else +#define PTR char * +#define const +#endif + +/* Since major is a function on SVR4, we can't just use `ifndef major'. */ +#ifdef major /* Might be defined in sys/types.h. */ +#define HAVE_MAJOR +#endif + +#if !defined(HAVE_MAJOR) && defined(MAJOR_IN_MKDEV) +#include <sys/mkdev.h> +#define HAVE_MAJOR +#endif + +#if !defined(HAVE_MAJOR) && defined(MAJOR_IN_SYSMACROS) +#include <sys/sysmacros.h> +#define HAVE_MAJOR +#endif + +#ifndef HAVE_MAJOR +#define major(dev) (((dev) >> 8) & 0xff) +#define minor(dev) ((dev) & 0xff) +#define makedev(maj, min) (((maj) << 8) | (min)) +#endif +#undef HAVE_MAJOR + +#if defined(STDC_HEADERS) || defined(HAVE_STRING_H) +#include <string.h> +#if !defined(__MSDOS__) && !defined(STDC_HEADERS) +#include <memory.h> +#endif +#ifdef index +#undef index +#endif +#ifdef rindex +#undef rindex +#endif +#define index strchr +#define rindex strrchr +#define bcopy(s, d, n) memcpy(d, s, n) +#define bzero(s, n) memset(s, 0, n) +#define bcmp memcmp +#else +#include <strings.h> +#endif + +#if defined(STDC_HEADERS) +#include <stdlib.h> +#else +char *malloc (), *realloc (); +char *getenv (); +#endif + +#ifndef _POSIX_VERSION +#ifdef __MSDOS__ +#include <io.h> +#else /* !__MSDOS__ */ +off_t lseek (); +#endif /* !__MSDOS__ */ +char *getcwd (); +#endif /* !_POSIX_VERSION */ + +#ifndef NULL +#define NULL 0 +#endif + +#ifndef O_BINARY +#define O_BINARY 0 +#endif +#ifndef O_CREAT +#define O_CREAT 0 +#endif +#ifndef O_NDELAY +#define O_NDELAY 0 +#endif +#ifndef O_RDONLY +#define O_RDONLY 0 +#endif +#ifndef O_RDWR +#define O_RDWR 2 +#endif + +#include <sys/stat.h> +#ifndef S_ISREG /* Doesn't have POSIX.1 stat stuff. */ +#define mode_t unsigned short +#endif +#if !defined(S_ISBLK) && defined(S_IFBLK) +#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) +#endif +#if !defined(S_ISCHR) && defined(S_IFCHR) +#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) +#endif +#if !defined(S_ISDIR) && defined(S_IFDIR) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#endif +#if !defined(S_ISREG) && defined(S_IFREG) +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif +#if !defined(S_ISFIFO) && defined(S_IFIFO) +#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) +#define mkfifo(path, mode) (mknod ((path), (mode) | S_IFIFO, 0)) +#endif +#if !defined(S_ISLNK) && defined(S_IFLNK) +#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) +#endif +#if !defined(S_ISSOCK) && defined(S_IFSOCK) +#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) +#endif +#if !defined(S_ISMPB) && defined(S_IFMPB) /* V7 */ +#define S_ISMPB(m) (((m) & S_IFMT) == S_IFMPB) +#define S_ISMPC(m) (((m) & S_IFMT) == S_IFMPC) +#endif +#if !defined(S_ISNWK) && defined(S_IFNWK) /* HP/UX */ +#define S_ISNWK(m) (((m) & S_IFMT) == S_IFNWK) +#endif +#if !defined(S_ISCTG) && defined(S_IFCTG) /* contiguous file */ +#define S_ISCTG(m) (((m) & S_IFMT) == S_IFCTG) +#endif +#if !defined(S_ISVTX) +#define S_ISVTX 0001000 +#endif + +#ifdef __MSDOS__ +#include "msd_dir.h" +#define NLENGTH(direct) ((direct)->d_namlen) + +#else /* not __MSDOS__ */ + +#if defined(DIRENT) || defined(_POSIX_VERSION) +#include <dirent.h> +#define NLENGTH(direct) (strlen((direct)->d_name)) +#else /* not (DIRENT or _POSIX_VERSION) */ +#define dirent direct +#define NLENGTH(direct) ((direct)->d_namlen) +#ifdef SYSNDIR +#include <sys/ndir.h> +#endif /* SYSNDIR */ +#ifdef SYSDIR +#include <sys/dir.h> +#endif /* SYSDIR */ +#ifdef NDIR +#include <ndir.h> +#endif /* NDIR */ +#endif /* DIRENT or _POSIX_VERSION */ + +#endif /* not __MSDOS__ */ diff --git a/gnu/usr.bin/tar/rmt.h b/gnu/usr.bin/tar/rmt.h new file mode 100644 index 0000000..2155223 --- /dev/null +++ b/gnu/usr.bin/tar/rmt.h @@ -0,0 +1,98 @@ +/* Definitions for communicating with a remote tape drive. + Copyright (C) 1988, 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#if !defined(_POSIX_VERSION) +#ifdef __MSDOS__ +#include <io.h> +#else /* !__MSDOS__ */ +extern off_t lseek (); +#endif /* __MSDOS__ */ +#endif /* _POSIX_VERSION */ + +#ifdef NO_REMOTE +#define _isrmt(f) 0 +#define rmtopen open +#define rmtaccess access +#define rmtstat stat +#define rmtcreat creat +#define rmtlstat lstat +#define rmtread read +#define rmtwrite write +#define rmtlseek lseek +#define rmtclose close +#define rmtioctl ioctl +#define rmtdup dup +#define rmtfstat fstat +#define rmtfcntl fcntl +#define rmtisatty isatty + +#else /* !NO_REMOTE */ + +#define __REM_BIAS 128 +#define RMTIOCTL + +#ifndef O_CREAT +#define O_CREAT 01000 +#endif + +extern char *__rmt_path; + +#if defined(STDC_HEADERS) || defined(HAVE_STRING_H) +#include <string.h> +#ifndef index +#define index strchr +#endif +#else +extern char *index (); +#endif + +#define _remdev(path) (!f_force_local && (__rmt_path=index(path, ':'))) +#define _isrmt(fd) ((fd) >= __REM_BIAS) + +#define rmtopen(path,oflag,mode) (_remdev(path) ? __rmt_open(path, oflag, mode, __REM_BIAS) : open(path, oflag, mode)) +#define rmtaccess(path, amode) (_remdev(path) ? 0 : access(path, amode)) +#define rmtstat(path, buf) (_remdev(path) ? (errno = EOPNOTSUPP), -1 : stat(path, buf)) +#define rmtcreat(path, mode) (_remdev(path) ? __rmt_open (path, 1 | O_CREAT, mode, __REM_BIAS) : creat(path, mode)) +#define rmtlstat(path,buf) (_remdev(path) ? (errno = EOPNOTSUPP), -1 : lstat(path,buf)) + +#define rmtread(fd, buf, n) (_isrmt(fd) ? __rmt_read(fd - __REM_BIAS, buf, n) : read(fd, buf, n)) +#define rmtwrite(fd, buf, n) (_isrmt(fd) ? __rmt_write(fd - __REM_BIAS, buf, n) : write(fd, buf, n)) +#define rmtlseek(fd, off, wh) (_isrmt(fd) ? __rmt_lseek(fd - __REM_BIAS, off, wh) : lseek(fd, off, wh)) +#define rmtclose(fd) (_isrmt(fd) ? __rmt_close(fd - __REM_BIAS) : close(fd)) +#ifdef RMTIOCTL +#define rmtioctl(fd,req,arg) (_isrmt(fd) ? __rmt_ioctl(fd - __REM_BIAS, req, arg) : ioctl(fd, req, arg)) +#else +#define rmtioctl(fd,req,arg) (_isrmt(fd) ? (errno = EOPNOTSUPP), -1 : ioctl(fd, req, arg)) +#endif +#define rmtdup(fd) (_isrmt(fd) ? (errno = EOPNOTSUPP), -1 : dup(fd)) +#define rmtfstat(fd, buf) (_isrmt(fd) ? (errno = EOPNOTSUPP), -1 : fstat(fd, buf)) +#define rmtfcntl(fd,cmd,arg) (_isrmt(fd) ? (errno = EOPNOTSUPP), -1 : fcntl (fd, cmd, arg)) +#define rmtisatty(fd) (_isrmt(fd) ? 0 : isatty(fd)) + +#undef RMTIOCTL + +int __rmt_open (); +int __rmt_close (); +int __rmt_read (); +int __rmt_write (); +long __rmt_lseek (); +int __rmt_ioctl (); +#endif /* !NO_REMOTE */ diff --git a/gnu/usr.bin/tar/rtapelib.c b/gnu/usr.bin/tar/rtapelib.c new file mode 100644 index 0000000..eece76f --- /dev/null +++ b/gnu/usr.bin/tar/rtapelib.c @@ -0,0 +1,582 @@ +/* Functions for communicating with a remote tape drive. + Copyright (C) 1988, 1992 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* The man page rmt(8) for /etc/rmt documents the remote mag tape + protocol which rdump and rrestore use. Unfortunately, the man + page is *WRONG*. The author of the routines I'm including originally + wrote his code just based on the man page, and it didn't work, so he + went to the rdump source to figure out why. The only thing he had to + change was to check for the 'F' return code in addition to the 'E', + and to separate the various arguments with \n instead of a space. I + personally don't think that this is much of a problem, but I wanted to + point it out. -- Arnold Robbins + + Originally written by Jeff Lee, modified some by Arnold Robbins. + Redone as a library that can replace open, read, write, etc., by + Fred Fish, with some additional work by Arnold Robbins. + Modified to make all rmtXXX calls into macros for speed by Jay Fenlason. + Use -DHAVE_NETDB_H for rexec code, courtesy of Dan Kegel, srs!dan. */ + +#include <stdio.h> +#include <sys/types.h> +#include <signal.h> + +#ifdef HAVE_SYS_MTIO_H +#include <sys/ioctl.h> +#include <sys/mtio.h> +#endif + +#ifdef HAVE_NETDB_H +#include <netdb.h> +#endif + +#include <errno.h> +#include <setjmp.h> +#include <sys/stat.h> + +#ifndef errno +extern int errno; +#endif + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef STDC_HEADERS +#include <string.h> +#include <stdlib.h> +#endif + +/* Maximum size of a fully qualified host name. */ +#define MAXHOSTLEN 257 + +/* Size of buffers for reading and writing commands to rmt. + (An arbitrary limit.) */ +#define CMDBUFSIZE 64 + +#ifndef RETSIGTYPE +#define RETSIGTYPE void +#endif + +/* Maximum number of simultaneous remote tape connections. + (Another arbitrary limit.) */ +#define MAXUNIT 4 + +/* Return the parent's read side of remote tape connection FILDES. */ +#define READ(fildes) (from_rmt[fildes][0]) + +/* Return the parent's write side of remote tape connection FILDES. */ +#define WRITE(fildes) (to_rmt[fildes][1]) + +/* The pipes for receiving data from remote tape drives. */ +static int from_rmt[MAXUNIT][2] = +{-1, -1, -1, -1, -1, -1, -1, -1}; + +/* The pipes for sending data to remote tape drives. */ +static int to_rmt[MAXUNIT][2] = +{-1, -1, -1, -1, -1, -1, -1, -1}; + +/* Temporary variable used by macros in rmt.h. */ +char *__rmt_path; + +/* Close remote tape connection FILDES. */ + +static void +_rmt_shutdown (fildes) + int fildes; +{ + close (READ (fildes)); + close (WRITE (fildes)); + READ (fildes) = -1; + WRITE (fildes) = -1; +} + +/* Attempt to perform the remote tape command specified in BUF + on remote tape connection FILDES. + Return 0 if successful, -1 on error. */ + +static int +command (fildes, buf) + int fildes; + char *buf; +{ + register int buflen; + RETSIGTYPE (*pipe_handler) (); + + /* Save the current pipe handler and try to make the request. */ + + pipe_handler = signal (SIGPIPE, SIG_IGN); + buflen = strlen (buf); + if (write (WRITE (fildes), buf, buflen) == buflen) + { + signal (SIGPIPE, pipe_handler); + return 0; + } + + /* Something went wrong. Close down and go home. */ + + signal (SIGPIPE, pipe_handler); + _rmt_shutdown (fildes); + errno = EIO; + return -1; +} + +/* Read and return the status from remote tape connection FILDES. + If an error occurred, return -1 and set errno. */ + +static int +status (fildes) + int fildes; +{ + int i; + char c, *cp; + char buffer[CMDBUFSIZE]; + + /* Read the reply command line. */ + + for (i = 0, cp = buffer; i < CMDBUFSIZE; i++, cp++) + { + if (read (READ (fildes), cp, 1) != 1) + { + _rmt_shutdown (fildes); + errno = EIO; + return -1; + } + if (*cp == '\n') + { + *cp = '\0'; + break; + } + } + + if (i == CMDBUFSIZE) + { + _rmt_shutdown (fildes); + errno = EIO; + return -1; + } + + /* Check the return status. */ + + for (cp = buffer; *cp; cp++) + if (*cp != ' ') + break; + + if (*cp == 'E' || *cp == 'F') + { + errno = atoi (cp + 1); + /* Skip the error message line. */ + while (read (READ (fildes), &c, 1) == 1) + if (c == '\n') + break; + + if (*cp == 'F') + _rmt_shutdown (fildes); + + return -1; + } + + /* Check for mis-synced pipes. */ + + if (*cp != 'A') + { + _rmt_shutdown (fildes); + errno = EIO; + return -1; + } + + /* Got an `A' (success) response. */ + return atoi (cp + 1); +} + +#ifdef HAVE_NETDB_H +/* Execute /etc/rmt as user USER on remote system HOST using rexec. + Return a file descriptor of a bidirectional socket for stdin and stdout. + If USER is NULL, or an empty string, use the current username. + + By default, this code is not used, since it requires that + the user have a .netrc file in his/her home directory, or that the + application designer be willing to have rexec prompt for login and + password info. This may be unacceptable, and .rhosts files for use + with rsh are much more common on BSD systems. */ + +static int +_rmt_rexec (host, user) + char *host; + char *user; +{ + struct servent *rexecserv; + int save_stdin = dup (fileno (stdin)); + int save_stdout = dup (fileno (stdout)); + int tape_fd; /* Return value. */ + + /* When using cpio -o < filename, stdin is no longer the tty. + But the rexec subroutine reads the login and the passwd on stdin, + to allow remote execution of the command. + So, reopen stdin and stdout on /dev/tty before the rexec and + give them back their original value after. */ + if (freopen ("/dev/tty", "r", stdin) == NULL) + freopen ("/dev/null", "r", stdin); + if (freopen ("/dev/tty", "w", stdout) == NULL) + freopen ("/dev/null", "w", stdout); + + rexecserv = getservbyname ("exec", "tcp"); + if (NULL == rexecserv) + { + fprintf (stderr, "exec/tcp: service not available"); + exit (1); + } + if (user != NULL && *user == '\0') + user = NULL; + tape_fd = rexec (&host, rexecserv->s_port, user, NULL, + "/etc/rmt", (int *) NULL); + fclose (stdin); + fdopen (save_stdin, "r"); + fclose (stdout); + fdopen (save_stdout, "w"); + + return tape_fd; +} + +#endif /* HAVE_NETDB_H */ + +/* Open a magtape device on the system specified in PATH, as the given user. + PATH has the form `[user@]system:/dev/????'. + If COMPAT is defined, it can also have the form `system[.user]:/dev/????'. + + OFLAG is O_RDONLY, O_WRONLY, etc. + MODE is ignored; 0666 is always used. + + If successful, return the remote tape pipe number plus BIAS. + On error, return -1. */ + +int +__rmt_open (path, oflag, mode, bias) + char *path; + int oflag; + int mode; + int bias; +{ + int i, rc; + char buffer[CMDBUFSIZE]; /* Command buffer. */ + char system[MAXHOSTLEN]; /* The remote host name. */ + char device[CMDBUFSIZE]; /* The remote device name. */ + char login[CMDBUFSIZE]; /* The remote user name. */ + char *sys, *dev, *user; /* For copying into the above buffers. */ + + sys = system; + dev = device; + user = login; + + /* Find an unused pair of file descriptors. */ + + for (i = 0; i < MAXUNIT; i++) + if (READ (i) == -1 && WRITE (i) == -1) + break; + + if (i == MAXUNIT) + { + errno = EMFILE; + return -1; + } + + /* Pull apart the system and device, and optional user. + Don't munge the original string. */ + + while (*path != '@' +#ifdef COMPAT + && *path != '.' +#endif + && *path != ':') + { + *sys++ = *path++; + } + *sys = '\0'; + path++; + + if (*(path - 1) == '@') + { + /* Saw user part of user@host. Start over. */ + strcpy (user, system); + sys = system; + while (*path != ':') + { + *sys++ = *path++; + } + *sys = '\0'; + path++; + } +#ifdef COMPAT + else if (*(path - 1) == '.') + { + while (*path != ':') + { + *user++ = *path++; + } + *user = '\0'; + path++; + } +#endif + else + *user = '\0'; + + while (*path) + { + *dev++ = *path++; + } + *dev = '\0'; + +#ifdef HAVE_NETDB_H + /* Execute the remote command using rexec. */ + READ (i) = WRITE (i) = _rmt_rexec (system, login); + if (READ (i) < 0) + return -1; +#else /* !HAVE_NETDB_H */ + /* Set up the pipes for the `rsh' command, and fork. */ + + if (pipe (to_rmt[i]) == -1 || pipe (from_rmt[i]) == -1) + return -1; + + rc = fork (); + if (rc == -1) + return -1; + + if (rc == 0) + { + /* Child. */ + close (0); + dup (to_rmt[i][0]); + close (to_rmt[i][0]); + close (to_rmt[i][1]); + + close (1); + dup (from_rmt[i][1]); + close (from_rmt[i][0]); + close (from_rmt[i][1]); + + setuid (getuid ()); + setgid (getgid ()); + + if (*login) + { + execl ("/usr/ucb/rsh", "rsh", system, "-l", login, + "/etc/rmt", (char *) 0); + execl ("/usr/bin/remsh", "remsh", system, "-l", login, + "/etc/rmt", (char *) 0); + execl ("/usr/bin/rsh", "rsh", system, "-l", login, + "/etc/rmt", (char *) 0); + execl ("/usr/bsd/rsh", "rsh", system, "-l", login, + "/etc/rmt", (char *) 0); + execl ("/usr/bin/nsh", "nsh", system, "-l", login, + "/etc/rmt", (char *) 0); + } + else + { + execl ("/usr/ucb/rsh", "rsh", system, + "/etc/rmt", (char *) 0); + execl ("/usr/bin/remsh", "remsh", system, + "/etc/rmt", (char *) 0); + execl ("/usr/bin/rsh", "rsh", system, + "/etc/rmt", (char *) 0); + execl ("/usr/bsd/rsh", "rsh", system, + "/etc/rmt", (char *) 0); + execl ("/usr/bin/nsh", "nsh", system, + "/etc/rmt", (char *) 0); + } + + /* Bad problems if we get here. */ + + perror ("cannot execute remote shell"); + _exit (1); + } + + /* Parent. */ + close (to_rmt[i][0]); + close (from_rmt[i][1]); +#endif /* !HAVE_NETDB_H */ + + /* Attempt to open the tape device. */ + + sprintf (buffer, "O%s\n%d\n", device, oflag); + if (command (i, buffer) == -1 || status (i) == -1) + return -1; + + return i + bias; +} + +/* Close remote tape connection FILDES and shut down. + Return 0 if successful, -1 on error. */ + +int +__rmt_close (fildes) + int fildes; +{ + int rc; + + if (command (fildes, "C\n") == -1) + return -1; + + rc = status (fildes); + _rmt_shutdown (fildes); + return rc; +} + +/* Read up to NBYTE bytes into BUF from remote tape connection FILDES. + Return the number of bytes read on success, -1 on error. */ + +int +__rmt_read (fildes, buf, nbyte) + int fildes; + char *buf; + unsigned int nbyte; +{ + int rc, i; + char buffer[CMDBUFSIZE]; + + sprintf (buffer, "R%d\n", nbyte); + if (command (fildes, buffer) == -1 || (rc = status (fildes)) == -1) + return -1; + + for (i = 0; i < rc; i += nbyte, buf += nbyte) + { + nbyte = read (READ (fildes), buf, rc - i); + if (nbyte <= 0) + { + _rmt_shutdown (fildes); + errno = EIO; + return -1; + } + } + + return rc; +} + +/* Write NBYTE bytes from BUF to remote tape connection FILDES. + Return the number of bytes written on success, -1 on error. */ + +int +__rmt_write (fildes, buf, nbyte) + int fildes; + char *buf; + unsigned int nbyte; +{ + char buffer[CMDBUFSIZE]; + RETSIGTYPE (*pipe_handler) (); + + sprintf (buffer, "W%d\n", nbyte); + if (command (fildes, buffer) == -1) + return -1; + + pipe_handler = signal (SIGPIPE, SIG_IGN); + if (write (WRITE (fildes), buf, nbyte) == nbyte) + { + signal (SIGPIPE, pipe_handler); + return status (fildes); + } + + /* Write error. */ + signal (SIGPIPE, pipe_handler); + _rmt_shutdown (fildes); + errno = EIO; + return -1; +} + +/* Perform an imitation lseek operation on remote tape connection FILDES. + Return the new file offset if successful, -1 if on error. */ + +long +__rmt_lseek (fildes, offset, whence) + int fildes; + long offset; + int whence; +{ + char buffer[CMDBUFSIZE]; + + sprintf (buffer, "L%ld\n%d\n", offset, whence); + if (command (fildes, buffer) == -1) + return -1; + + return status (fildes); +} + +/* Perform a raw tape operation on remote tape connection FILDES. + Return the results of the ioctl, or -1 on error. */ + +#ifdef MTIOCTOP +int +__rmt_ioctl (fildes, op, arg) + int fildes, op; + char *arg; +{ + char c; + int rc, cnt; + char buffer[CMDBUFSIZE]; + + switch (op) + { + default: + errno = EINVAL; + return -1; + + case MTIOCTOP: + /* MTIOCTOP is the easy one. Nothing is transfered in binary. */ + sprintf (buffer, "I%d\n%d\n", ((struct mtop *) arg)->mt_op, + ((struct mtop *) arg)->mt_count); + if (command (fildes, buffer) == -1) + return -1; + return status (fildes); /* Return the count. */ + + case MTIOCGET: + /* Grab the status and read it directly into the structure. + This assumes that the status buffer is not padded + and that 2 shorts fit in a long without any word + alignment problems; i.e., the whole struct is contiguous. + NOTE - this is probably NOT a good assumption. */ + + if (command (fildes, "S") == -1 || (rc = status (fildes)) == -1) + return -1; + + for (; rc > 0; rc -= cnt, arg += cnt) + { + cnt = read (READ (fildes), arg, rc); + if (cnt <= 0) + { + _rmt_shutdown (fildes); + errno = EIO; + return -1; + } + } + + /* Check for byte position. mt_type is a small integer field + (normally) so we will check its magnitude. If it is larger than + 256, we will assume that the bytes are swapped and go through + and reverse all the bytes. */ + + if (((struct mtget *) arg)->mt_type < 256) + return 0; + + for (cnt = 0; cnt < rc; cnt += 2) + { + c = arg[cnt]; + arg[cnt] = arg[cnt + 1]; + arg[cnt + 1] = c; + } + + return 0; + } +} + +#endif diff --git a/gnu/usr.bin/tar/tar.c b/gnu/usr.bin/tar/tar.c new file mode 100644 index 0000000..9382582 --- /dev/null +++ b/gnu/usr.bin/tar/tar.c @@ -0,0 +1,1504 @@ +/* Tar -- a tape archiver. + Copyright (C) 1988, 1992, 1993 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* + * A tar (tape archiver) program. + * + * Written by John Gilmore, ihnp4!hoptoad!gnu, starting 25 Aug 85. + */ + +#include <stdio.h> +#include <sys/types.h> /* Needed for typedefs in tar.h */ +#include "getopt.h" + +/* + * The following causes "tar.h" to produce definitions of all the + * global variables, rather than just "extern" declarations of them. + */ +#define TAR_EXTERN /**/ +#include "tar.h" + +#include "port.h" +#include "regex.h" +#include "fnmatch.h" + +/* + * We should use a conversion routine that does reasonable error + * checking -- atoi doesn't. For now, punt. FIXME. + */ +#define intconv atoi +PTR ck_malloc (); +PTR ck_realloc (); +extern int getoldopt (); +extern void read_and (); +extern void list_archive (); +extern void extract_archive (); +extern void diff_archive (); +extern void create_archive (); +extern void update_archive (); +extern void junk_archive (); +extern void init_volume_number (); +extern void closeout_volume_number (); + +/* JF */ +extern time_t get_date (); + +time_t new_time; + +static FILE *namef; /* File to read names from */ +static char **n_argv; /* Argv used by name routines */ +static int n_argc; /* Argc used by name routines */ +static char **n_ind; /* Store an array of names */ +static int n_indalloc; /* How big is the array? */ +static int n_indused; /* How many entries does it have? */ +static int n_indscan; /* How many of the entries have we scanned? */ + + +extern FILE *msg_file; + +int check_exclude (); +void add_exclude (); +void add_exclude_file (); +void addname (); +void describe (); +void diff_init (); +void extr_init (); +int is_regex (); +void name_add (); +void name_init (); +void options (); +char *un_quote_string (); + +#ifndef S_ISLNK +#define lstat stat +#endif + +#ifndef DEFBLOCKING +#define DEFBLOCKING 20 +#endif + +#ifndef DEF_AR_FILE +#define DEF_AR_FILE "tar.out" +#endif + +/* For long options that unconditionally set a single flag, we have getopt + do it. For the others, we share the code for the equivalent short + named option, the name of which is stored in the otherwise-unused `val' + field of the `struct option'; for long options that have no equivalent + short option, we use nongraphic characters as pseudo short option + characters, starting (for no particular reason) with character 10. */ + +struct option long_options[] = +{ + {"create", 0, 0, 'c'}, + {"append", 0, 0, 'r'}, + {"extract", 0, 0, 'x'}, + {"get", 0, 0, 'x'}, + {"list", 0, 0, 't'}, + {"update", 0, 0, 'u'}, + {"catenate", 0, 0, 'A'}, + {"concatenate", 0, 0, 'A'}, + {"compare", 0, 0, 'd'}, + {"diff", 0, 0, 'd'}, + {"delete", 0, 0, 14}, + {"help", 0, 0, 12}, + + {"null", 0, 0, 16}, + {"directory", 1, 0, 'C'}, + {"record-number", 0, &f_sayblock, 1}, + {"files-from", 1, 0, 'T'}, + {"label", 1, 0, 'V'}, + {"exclude-from", 1, 0, 'X'}, + {"exclude", 1, 0, 15}, + {"file", 1, 0, 'f'}, + {"block-size", 1, 0, 'b'}, + {"version", 0, 0, 11}, + {"verbose", 0, 0, 'v'}, + {"totals", 0, &f_totals, 1}, + + {"read-full-blocks", 0, &f_reblock, 1}, + {"starting-file", 1, 0, 'K'}, + {"to-stdout", 0, &f_exstdout, 1}, + {"ignore-zeros", 0, &f_ignorez, 1}, + {"keep-old-files", 0, 0, 'k'}, + {"same-permissions", 0, &f_use_protection, 1}, + {"preserve-permissions", 0, &f_use_protection, 1}, + {"modification-time", 0, &f_modified, 1}, + {"preserve", 0, 0, 10}, + {"same-order", 0, &f_sorted_names, 1}, + {"same-owner", 0, &f_do_chown, 1}, + {"preserve-order", 0, &f_sorted_names, 1}, + + {"newer", 1, 0, 'N'}, + {"after-date", 1, 0, 'N'}, + {"newer-mtime", 1, 0, 13}, + {"incremental", 0, 0, 'G'}, + {"listed-incremental", 1, 0, 'g'}, + {"multi-volume", 0, &f_multivol, 1}, + {"info-script", 1, 0, 'F'}, + {"new-volume-script", 1, 0, 'F'}, + {"absolute-paths", 0, &f_absolute_paths, 1}, + {"interactive", 0, &f_confirm, 1}, + {"confirmation", 0, &f_confirm, 1}, + + {"verify", 0, &f_verify, 1}, + {"dereference", 0, &f_follow_links, 1}, + {"one-file-system", 0, &f_local_filesys, 1}, + {"old-archive", 0, 0, 'o'}, + {"portability", 0, 0, 'o'}, + {"compress", 0, 0, 'Z'}, + {"uncompress", 0, 0, 'Z'}, + {"block-compress", 0, &f_compress_block, 1}, + {"gzip", 0, 0, 'z'}, + {"ungzip", 0, 0, 'z'}, + {"use-compress-program", 1, 0, 18}, + + + {"same-permissions", 0, &f_use_protection, 1}, + {"sparse", 0, &f_sparse_files, 1}, + {"tape-length", 1, 0, 'L'}, + {"remove-files", 0, &f_remove_files, 1}, + {"ignore-failed-read", 0, &f_ignore_failed_read, 1}, + {"checkpoint", 0, &f_checkpoint, 1}, + {"show-omitted-dirs", 0, &f_show_omitted_dirs, 1}, + {"volno-file", 1, 0, 17}, + {"force-local", 0, &f_force_local, 1}, + {"atime-preserve", 0, &f_atime_preserve, 1}, + + {0, 0, 0, 0} +}; + +/* + * Main routine for tar. + */ +void +main (argc, argv) + int argc; + char **argv; +{ + extern char version_string[]; + + tar = argv[0]; /* JF: was "tar" Set program name */ + filename_terminator = '\n'; + errors = 0; + + options (argc, argv); + + if (!n_argv) + name_init (argc, argv); + + if (f_volno_file) + init_volume_number (); + + switch (cmd_mode) + { + case CMD_CAT: + case CMD_UPDATE: + case CMD_APPEND: + update_archive (); + break; + case CMD_DELETE: + junk_archive (); + break; + case CMD_CREATE: + create_archive (); + if (f_totals) + fprintf (stderr, "Total bytes written: %d\n", tot_written); + break; + case CMD_EXTRACT: + if (f_volhdr) + { + const char *err; + label_pattern = (struct re_pattern_buffer *) + ck_malloc (sizeof *label_pattern); + err = re_compile_pattern (f_volhdr, strlen (f_volhdr), + label_pattern); + if (err) + { + fprintf (stderr, "Bad regular expression: %s\n", + err); + errors++; + break; + } + + } + extr_init (); + read_and (extract_archive); + break; + case CMD_LIST: + if (f_volhdr) + { + const char *err; + label_pattern = (struct re_pattern_buffer *) + ck_malloc (sizeof *label_pattern); + err = re_compile_pattern (f_volhdr, strlen (f_volhdr), + label_pattern); + if (err) + { + fprintf (stderr, "Bad regular expression: %s\n", + err); + errors++; + break; + } + } + read_and (list_archive); +#if 0 + if (!errors) + errors = different; +#endif + break; + case CMD_DIFF: + diff_init (); + read_and (diff_archive); + break; + case CMD_VERSION: + fprintf (stderr, "%s\n", version_string); + break; + case CMD_NONE: + msg ("you must specify exactly one of the r, c, t, x, or d options\n"); + fprintf (stderr, "For more information, type ``%s --help''.\n", tar); + exit (EX_ARGSBAD); + } + if (f_volno_file) + closeout_volume_number (); + exit (errors); + /* NOTREACHED */ +} + + +/* + * Parse the options for tar. + */ +void +options (argc, argv) + int argc; + char **argv; +{ + register int c; /* Option letter */ + int ind = -1; + + /* Set default option values */ + blocking = DEFBLOCKING; /* From Makefile */ + ar_files = (char **) ck_malloc (sizeof (char *) * 10); + ar_files_len = 10; + n_ar_files = 0; + cur_ar_file = 0; + + /* Parse options */ + while ((c = getoldopt (argc, argv, + "-01234567Ab:BcC:df:F:g:GhikK:lL:mMN:oOpPrRsStT:uvV:wWxX:zZ", + long_options, &ind)) != EOF) + { + switch (c) + { + case 0: /* long options that set a single flag */ + break; + case 1: + /* File name or non-parsed option */ + name_add (optarg); + break; + case 'C': + name_add ("-C"); + name_add (optarg); + break; + case 10: /* preserve */ + f_use_protection = f_sorted_names = 1; + break; + case 11: + if (cmd_mode != CMD_NONE) + goto badopt; + cmd_mode = CMD_VERSION; + break; + case 12: /* help */ + printf ("This is GNU tar, the tape archiving program.\n"); + describe (); + exit (1); + case 13: + f_new_files++; + goto get_newer; + + case 14: /* Delete in the archive */ + if (cmd_mode != CMD_NONE) + goto badopt; + cmd_mode = CMD_DELETE; + break; + + case 15: + f_exclude++; + add_exclude (optarg); + break; + + case 16: /* -T reads null terminated filenames. */ + filename_terminator = '\0'; + break; + + case 17: + f_volno_file = optarg; + break; + + case 18: + if (f_compressprog) + { + msg ("Only one compression option permitted\n"); + exit (EX_ARGSBAD); + } + f_compressprog = optarg; + break; + + case 'g': /* We are making a GNU dump; save + directories at the beginning of + the archive, and include in each + directory its contents */ + if (f_oldarch) + goto badopt; + f_gnudump++; + gnu_dumpfile = optarg; + break; + + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { + /* JF this'll have to be modified for other + systems, of course! */ + int d, add; + static char buf[50]; + + d = getoldopt (argc, argv, "lmh"); +#ifdef MAYBEDEF + sprintf (buf, "/dev/rmt/%d%c", c, d); +#else +#ifndef LOW_NUM +#define LOW_NUM 0 +#define MID_NUM 8 +#define HGH_NUM 16 +#endif + if (d == 'l') + add = LOW_NUM; + else if (d == 'm') + add = MID_NUM; + else if (d == 'h') + add = HGH_NUM; + else + goto badopt; + + sprintf (buf, "/dev/rmt%d", add + c - '0'); +#endif + if (n_ar_files == ar_files_len) + ar_files + = (char **) + ck_malloc (sizeof (char *) + * (ar_files_len *= 2)); + ar_files[n_ar_files++] = buf; + } + break; + + case 'A': /* Arguments are tar files, + just cat them onto the end + of the archive. */ + if (cmd_mode != CMD_NONE) + goto badopt; + cmd_mode = CMD_CAT; + break; + + case 'b': /* Set blocking factor */ + blocking = intconv (optarg); + break; + + case 'B': /* Try to reblock input */ + f_reblock++; /* For reading 4.2BSD pipes */ + break; + + case 'c': /* Create an archive */ + if (cmd_mode != CMD_NONE) + goto badopt; + cmd_mode = CMD_CREATE; + break; + +#if 0 + case 'C': + if (chdir (optarg) < 0) + msg_perror ("Can't change directory to %d", optarg); + break; +#endif + + case 'd': /* Find difference tape/disk */ + if (cmd_mode != CMD_NONE) + goto badopt; + cmd_mode = CMD_DIFF; + break; + + case 'f': /* Use ar_file for the archive */ + if (n_ar_files == ar_files_len) + ar_files + = (char **) ck_malloc (sizeof (char *) + * (ar_files_len *= 2)); + + ar_files[n_ar_files++] = optarg; + break; + + case 'F': + /* Since -F is only useful with -M , make it implied */ + f_run_script_at_end++;/* run this script at the end */ + info_script = optarg; /* of each tape */ + f_multivol++; + break; + + case 'G': /* We are making a GNU dump; save + directories at the beginning of + the archive, and include in each + directory its contents */ + if (f_oldarch) + goto badopt; + f_gnudump++; + gnu_dumpfile = 0; + break; + + case 'h': + f_follow_links++; /* follow symbolic links */ + break; + + case 'i': + f_ignorez++; /* Ignore zero records (eofs) */ + /* + * This can't be the default, because Unix tar + * writes two records of zeros, then pads out the + * block with garbage. + */ + break; + + case 'k': /* Don't overwrite files */ +#ifdef NO_OPEN3 + msg ("can't keep old files on this system"); + exit (EX_ARGSBAD); +#else + f_keep++; +#endif + break; + + case 'K': + f_startfile++; + addname (optarg); + break; + + case 'l': /* When dumping directories, don't + dump files/subdirectories that are + on other filesystems. */ + f_local_filesys++; + break; + + case 'L': + tape_length = intconv (optarg); + f_multivol++; + break; + case 'm': + f_modified++; + break; + + case 'M': /* Make Multivolume archive: + When we can't write any more + into the archive, re-open it, + and continue writing */ + f_multivol++; + break; + + case 'N': /* Only write files newer than X */ + get_newer: + f_new_files++; + new_time = get_date (optarg, (PTR) 0); + if (new_time == (time_t) - 1) + { + msg ("invalid date format `%s'", optarg); + exit (EX_ARGSBAD); + } + break; + + case 'o': /* Generate old archive */ + if (f_gnudump /* || f_dironly */ ) + goto badopt; + f_oldarch++; + break; + + case 'O': + f_exstdout++; + break; + + case 'p': + f_use_protection++; + break; + + case 'P': + f_absolute_paths++; + break; + + case 'r': /* Append files to the archive */ + if (cmd_mode != CMD_NONE) + goto badopt; + cmd_mode = CMD_APPEND; + break; + + case 'R': + f_sayblock++; /* Print block #s for debug */ + break; /* of bad tar archives */ + + case 's': + f_sorted_names++; /* Names to extr are sorted */ + break; + + case 'S': /* deal with sparse files */ + f_sparse_files++; + break; + case 't': + if (cmd_mode != CMD_NONE) + goto badopt; + cmd_mode = CMD_LIST; + f_verbose++; /* "t" output == "cv" or "xv" */ + break; + + case 'T': + name_file = optarg; + f_namefile++; + break; + + case 'u': /* Append files to the archive that + aren't there, or are newer than the + copy in the archive */ + if (cmd_mode != CMD_NONE) + goto badopt; + cmd_mode = CMD_UPDATE; + break; + + case 'v': + f_verbose++; + break; + + case 'V': + f_volhdr = optarg; + break; + + case 'w': + f_confirm++; + break; + + case 'W': + f_verify++; + break; + + case 'x': /* Extract files from the archive */ + if (cmd_mode != CMD_NONE) + goto badopt; + cmd_mode = CMD_EXTRACT; + break; + + case 'X': + f_exclude++; + add_exclude_file (optarg); + break; + + case 'z': + if (f_compressprog) + { + msg ("Only one compression option permitted\n"); + exit (EX_ARGSBAD); + } + f_compressprog = "gzip"; + break; + + case 'Z': + if (f_compressprog) + { + msg ("Only one compression option permitted\n"); + exit (EX_ARGSBAD); + } + f_compressprog = "compress"; + break; + + case '?': + badopt: + msg ("Unknown option. Use '%s --help' for a complete list of options.", tar); + exit (EX_ARGSBAD); + + } + } + + blocksize = blocking * RECORDSIZE; + if (n_ar_files == 0) + { + n_ar_files = 1; + ar_files[0] = getenv ("TAPE"); /* From environment, or */ + if (ar_files[0] == 0) + ar_files[0] = DEF_AR_FILE; /* From Makefile */ + } + if (n_ar_files > 1 && !f_multivol) + { + msg ("Multiple archive files requires --multi-volume\n"); + exit (EX_ARGSBAD); + } + if (f_compress_block && !f_compressprog) + { + msg ("You must use a compression option (--gzip, --compress\n\ +or --use-compress-program) with --block-compress.\n"); + exit (EX_ARGSBAD); + } +} + + +/* + * Print as much help as the user's gonna get. + * + * We have to sprinkle in the KLUDGE lines because too many compilers + * cannot handle character strings longer than about 512 bytes. Yuk! + * In particular, MS-DOS and Xenix MSC and PDP-11 V7 Unix have this + * problem. + */ +void +describe () +{ + puts ("choose one of the following:"); + fputs ("\ +-A, --catenate,\n\ + --concatenate append tar files to an archive\n\ +-c, --create create a new archive\n\ +-d, --diff,\n\ + --compare find differences between archive and file system\n\ +--delete delete from the archive (not for use on mag tapes!)\n\ +-r, --append append files to the end of an archive\n\ +-t, --list list the contents of an archive\n\ +-u, --update only append files that are newer than copy in archive\n\ +-x, --extract,\n\ + --get extract files from an archive\n", stdout); + + fprintf (stdout, "\ +Other options:\n\ +--atime-preserve don't change access times on dumped files\n\ +-b, --block-size N block size of Nx512 bytes (default N=%d)\n", DEFBLOCKING); + fputs ("\ +-B, --read-full-blocks reblock as we read (for reading 4.2BSD pipes)\n\ +-C, --directory DIR change to directory DIR\n\ +--checkpoint print directory names while reading the archive\n\ +", stdout); /* KLUDGE */ + fprintf (stdout, "\ +-f, --file [HOSTNAME:]F use archive file or device F (default %s)\n", + DEF_AR_FILE); + fputs ("\ +--force-local archive file is local even if has a colon\n\ +-F, --info-script F\n\ + --new-volume-script F run script at end of each tape (implies -M)\n\ +-G, --incremental create/list/extract old GNU-format incremental backup\n\ +-g, --listed-incremental F create/list/extract new GNU-format incremental backup\n\ +-h, --dereference don't dump symlinks; dump the files they point to\n\ +-i, --ignore-zeros ignore blocks of zeros in archive (normally mean EOF)\n\ +--ignore-failed-read don't exit with non-zero status on unreadable files\n\ +-k, --keep-old-files keep existing files; don't overwrite them from archive\n\ +-K, --starting-file F begin at file F in the archive\n\ +-l, --one-file-system stay in local file system when creating an archive\n\ +-L, --tape-length N change tapes after writing N*1024 bytes\n\ +", stdout); /* KLUDGE */ + fputs ("\ +-m, --modification-time don't extract file modified time\n\ +-M, --multi-volume create/list/extract multi-volume archive\n\ +-N, --after-date DATE,\n\ + --newer DATE only store files newer than DATE\n\ +-o, --old-archive,\n\ + --portability write a V7 format archive, rather than ANSI format\n\ +-O, --to-stdout extract files to standard output\n\ +-p, --same-permissions,\n\ + --preserve-permissions extract all protection information\n\ +-P, --absolute-paths don't strip leading `/'s from file names\n\ +--preserve like -p -s\n\ +", stdout); /* KLUDGE */ + fputs ("\ +-R, --record-number show record number within archive with each message\n\ +--remove-files remove files after adding them to the archive\n\ +-s, --same-order,\n\ + --preserve-order list of names to extract is sorted to match archive\n\ +--same-owner create extracted files with the same ownership \n\ +-S, --sparse handle sparse files efficiently\n\ +-T, --files-from F get names to extract or create from file F\n\ +--null -T reads null-terminated names, disable -C\n\ +--totals print total bytes written with --create\n\ +-v, --verbose verbosely list files processed\n\ +-V, --label NAME create archive with volume name NAME\n\ +--version print tar program version number\n\ +-w, --interactive,\n\ + --confirmation ask for confirmation for every action\n\ +", stdout); /* KLUDGE */ + fputs ("\ +-W, --verify attempt to verify the archive after writing it\n\ +--exclude FILE exclude file FILE\n\ +-X, --exclude-from FILE exclude files listed in FILE\n\ +-Z, --compress,\n\ + --uncompress filter the archive through compress\n\ +-z, --gzip,\n\ + --ungzip filter the archive through gzip\n\ +--use-compress-program PROG\n\ + filter the archive through PROG (which must accept -d)\n\ +--block-compress block the output of compression program for tapes\n\ +-[0-7][lmh] specify drive and density\n\ +", stdout); +} + +void +name_add (name) + char *name; +{ + if (n_indalloc == n_indused) + { + n_indalloc += 10; + n_ind = (char **) (n_indused ? ck_realloc (n_ind, n_indalloc * sizeof (char *)): ck_malloc (n_indalloc * sizeof (char *))); + } + n_ind[n_indused++] = name; +} + +/* + * Set up to gather file names for tar. + * + * They can either come from stdin or from argv. + */ +void +name_init (argc, argv) + int argc; + char **argv; +{ + + if (f_namefile) + { + if (optind < argc) + { + msg ("too many args with -T option"); + exit (EX_ARGSBAD); + } + if (!strcmp (name_file, "-")) + { + namef = stdin; + } + else + { + namef = fopen (name_file, "r"); + if (namef == NULL) + { + msg_perror ("can't open file %s", name_file); + exit (EX_BADFILE); + } + } + } + else + { + /* Get file names from argv, after options. */ + n_argc = argc; + n_argv = argv; + } +} + +/* Read the next filename read from STREAM and null-terminate it. + Put it into BUFFER, reallocating and adjusting *PBUFFER_SIZE if necessary. + Return the new value for BUFFER, or NULL at end of file. */ + +char * +read_name_from_file (buffer, pbuffer_size, stream) + char *buffer; + size_t *pbuffer_size; + FILE *stream; +{ + register int c; + register int indx = 0; + register size_t buffer_size = *pbuffer_size; + + while ((c = getc (stream)) != EOF && c != filename_terminator) + { + if (indx == buffer_size) + { + buffer_size += NAMSIZ; + buffer = ck_realloc (buffer, buffer_size + 2); + } + buffer[indx++] = c; + } + if (indx == 0 && c == EOF) + return NULL; + if (indx == buffer_size) + { + buffer_size += NAMSIZ; + buffer = ck_realloc (buffer, buffer_size + 2); + } + buffer[indx] = '\0'; + *pbuffer_size = buffer_size; + return buffer; +} + +/* + * Get the next name from argv or the name file. + * + * Result is in static storage and can't be relied upon across two calls. + * + * If CHANGE_DIRS is non-zero, treat a filename of the form "-C" as + * meaning that the next filename is the name of a directory to change to. + * If `filename_terminator' is '\0', CHANGE_DIRS is effectively always 0. + */ + +char * +name_next (change_dirs) + int change_dirs; +{ + static char *buffer; /* Holding pattern */ + static int buffer_siz; + register char *p; + register char *q = 0; + register int next_name_is_dir = 0; + extern char *un_quote_string (); + + if (buffer_siz == 0) + { + buffer = ck_malloc (NAMSIZ + 2); + buffer_siz = NAMSIZ; + } + if (filename_terminator == '\0') + change_dirs = 0; +tryagain: + if (namef == NULL) + { + if (n_indscan < n_indused) + p = n_ind[n_indscan++]; + else if (optind < n_argc) + /* Names come from argv, after options */ + p = n_argv[optind++]; + else + { + if (q) + msg ("Missing filename after -C"); + return NULL; + } + + /* JF trivial support for -C option. I don't know if + chdir'ing at this point is dangerous or not. + It seems to work, which is all I ask. */ + if (change_dirs && !q && p[0] == '-' && p[1] == 'C' && p[2] == '\0') + { + q = p; + goto tryagain; + } + if (q) + { + if (chdir (p) < 0) + msg_perror ("Can't chdir to %s", p); + q = 0; + goto tryagain; + } + /* End of JF quick -C hack */ + +#if 0 + if (f_exclude && check_exclude (p)) + goto tryagain; +#endif + return un_quote_string (p); + } + while (p = read_name_from_file (buffer, &buffer_siz, namef)) + { + buffer = p; + if (*p == '\0') + continue; /* Ignore empty lines. */ + q = p + strlen (p) - 1; + while (q > p && *q == '/')/* Zap trailing "/"s. */ + *q-- = '\0'; + if (change_dirs && next_name_is_dir == 0 + && p[0] == '-' && p[1] == 'C' && p[2] == '\0') + { + next_name_is_dir = 1; + goto tryagain; + } + if (next_name_is_dir) + { + if (chdir (p) < 0) + msg_perror ("Can't change to directory %s", p); + next_name_is_dir = 0; + goto tryagain; + } +#if 0 + if (f_exclude && check_exclude (p)) + goto tryagain; +#endif + return un_quote_string (p); + } + return NULL; +} + + +/* + * Close the name file, if any. + */ +void +name_close () +{ + + if (namef != NULL && namef != stdin) + fclose (namef); +} + + +/* + * Gather names in a list for scanning. + * Could hash them later if we really care. + * + * If the names are already sorted to match the archive, we just + * read them one by one. name_gather reads the first one, and it + * is called by name_match as appropriate to read the next ones. + * At EOF, the last name read is just left in the buffer. + * This option lets users of small machines extract an arbitrary + * number of files by doing "tar t" and editing down the list of files. + */ +void +name_gather () +{ + register char *p; + static struct name *namebuf; /* One-name buffer */ + static namelen; + static char *chdir_name; + + if (f_sorted_names) + { + if (!namelen) + { + namelen = NAMSIZ; + namebuf = (struct name *) ck_malloc (sizeof (struct name) + NAMSIZ); + } + p = name_next (0); + if (p) + { + if (*p == '-' && p[1] == 'C' && p[2] == '\0') + { + chdir_name = name_next (0); + p = name_next (0); + if (!p) + { + msg ("Missing file name after -C"); + exit (EX_ARGSBAD); + } + namebuf->change_dir = chdir_name; + } + namebuf->length = strlen (p); + if (namebuf->length >= namelen) + { + namebuf = (struct name *) ck_realloc (namebuf, sizeof (struct name) + namebuf->length); + namelen = namebuf->length; + } + strncpy (namebuf->name, p, namebuf->length); + namebuf->name[namebuf->length] = 0; + namebuf->next = (struct name *) NULL; + namebuf->found = 0; + namelist = namebuf; + namelast = namelist; + } + return; + } + + /* Non sorted names -- read them all in */ + while (p = name_next (0)) + addname (p); +} + +/* + * Add a name to the namelist. + */ +void +addname (name) + char *name; /* pointer to name */ +{ + register int i; /* Length of string */ + register struct name *p; /* Current struct pointer */ + static char *chdir_name; + char *new_name (); + + if (name[0] == '-' && name[1] == 'C' && name[2] == '\0') + { + chdir_name = name_next (0); + name = name_next (0); + if (!chdir_name) + { + msg ("Missing file name after -C"); + exit (EX_ARGSBAD); + } + if (chdir_name[0] != '/') + { + char *path = ck_malloc (PATH_MAX); +#if defined(__MSDOS__) || defined(HAVE_GETCWD) || defined(_POSIX_VERSION) + if (!getcwd (path, PATH_MAX)) + { + msg ("Couldn't get current directory."); + exit (EX_SYSTEM); + } +#else + char *getwd (); + + if (!getwd (path)) + { + msg ("Couldn't get current directory: %s", path); + exit (EX_SYSTEM); + } +#endif + chdir_name = new_name (path, chdir_name); + free (path); + } + } + + if (name) + { + i = strlen (name); + /*NOSTRICT*/ + p = (struct name *) malloc ((unsigned) (sizeof (struct name) + i)); + } + else + p = (struct name *) malloc ((unsigned) (sizeof (struct name))); + if (!p) + { + if (name) + msg ("cannot allocate mem for name '%s'.", name); + else + msg ("cannot allocate mem for chdir record."); + exit (EX_SYSTEM); + } + p->next = (struct name *) NULL; + if (name) + { + p->fake = 0; + p->length = i; + strncpy (p->name, name, i); + p->name[i] = '\0'; /* Null term */ + } + else + p->fake = 1; + p->found = 0; + p->regexp = 0; /* Assume not a regular expression */ + p->firstch = 1; /* Assume first char is literal */ + p->change_dir = chdir_name; + p->dir_contents = 0; /* JF */ + if (name) + { + if (index (name, '*') || index (name, '[') || index (name, '?')) + { + p->regexp = 1; /* No, it's a regexp */ + if (name[0] == '*' || name[0] == '[' || name[0] == '?') + p->firstch = 0; /* Not even 1st char literal */ + } + } + + if (namelast) + namelast->next = p; + namelast = p; + if (!namelist) + namelist = p; +} + +/* + * Return nonzero if name P (from an archive) matches any name from + * the namelist, zero if not. + */ +int +name_match (p) + register char *p; +{ + register struct name *nlp; + register int len; + +again: + if (0 == (nlp = namelist)) /* Empty namelist is easy */ + return 1; + if (nlp->fake) + { + if (nlp->change_dir && chdir (nlp->change_dir)) + msg_perror ("Can't change to directory %d", nlp->change_dir); + namelist = 0; + return 1; + } + len = strlen (p); + for (; nlp != 0; nlp = nlp->next) + { + /* If first chars don't match, quick skip */ + if (nlp->firstch && nlp->name[0] != p[0]) + continue; + + /* Regular expressions (shell globbing, actually). */ + if (nlp->regexp) + { + if (fnmatch (nlp->name, p, FNM_LEADING_DIR) == 0) + { + nlp->found = 1; /* Remember it matched */ + if (f_startfile) + { + free ((void *) namelist); + namelist = 0; + } + if (nlp->change_dir && chdir (nlp->change_dir)) + msg_perror ("Can't change to directory %s", nlp->change_dir); + return 1; /* We got a match */ + } + continue; + } + + /* Plain Old Strings */ + if (nlp->length <= len /* Archive len >= specified */ + && (p[nlp->length] == '\0' || p[nlp->length] == '/') + /* Full match on file/dirname */ + && strncmp (p, nlp->name, nlp->length) == 0) /* Name compare */ + { + nlp->found = 1; /* Remember it matched */ + if (f_startfile) + { + free ((void *) namelist); + namelist = 0; + } + if (nlp->change_dir && chdir (nlp->change_dir)) + msg_perror ("Can't change to directory %s", nlp->change_dir); + return 1; /* We got a match */ + } + } + + /* + * Filename from archive not found in namelist. + * If we have the whole namelist here, just return 0. + * Otherwise, read the next name in and compare it. + * If this was the last name, namelist->found will remain on. + * If not, we loop to compare the newly read name. + */ + if (f_sorted_names && namelist->found) + { + name_gather (); /* Read one more */ + if (!namelist->found) + goto again; + } + return 0; +} + + +/* + * Print the names of things in the namelist that were not matched. + */ +void +names_notfound () +{ + register struct name *nlp, *next; + register char *p; + + for (nlp = namelist; nlp != 0; nlp = next) + { + next = nlp->next; + if (!nlp->found) + msg ("%s not found in archive", nlp->name); + + /* + * We could free() the list, but the process is about + * to die anyway, so save some CPU time. Amigas and + * other similarly broken software will need to waste + * the time, though. + */ +#ifdef amiga + if (!f_sorted_names) + free (nlp); +#endif + } + namelist = (struct name *) NULL; + namelast = (struct name *) NULL; + + if (f_sorted_names) + { + while (0 != (p = name_next (1))) + msg ("%s not found in archive", p); + } +} + +/* These next routines were created by JF */ + +void +name_expand () +{ + ; +} + +/* This is like name_match(), except that it returns a pointer to the name + it matched, and doesn't set ->found The caller will have to do that + if it wants to. Oh, and if the namelist is empty, it returns 0, unlike + name_match(), which returns TRUE */ + +struct name * +name_scan (p) + register char *p; +{ + register struct name *nlp; + register int len; + +again: + if (0 == (nlp = namelist)) /* Empty namelist is easy */ + return 0; + len = strlen (p); + for (; nlp != 0; nlp = nlp->next) + { + /* If first chars don't match, quick skip */ + if (nlp->firstch && nlp->name[0] != p[0]) + continue; + + /* Regular expressions */ + if (nlp->regexp) + { + if (fnmatch (nlp->name, p, FNM_LEADING_DIR) == 0) + return nlp; /* We got a match */ + continue; + } + + /* Plain Old Strings */ + if (nlp->length <= len /* Archive len >= specified */ + && (p[nlp->length] == '\0' || p[nlp->length] == '/') + /* Full match on file/dirname */ + && strncmp (p, nlp->name, nlp->length) == 0) /* Name compare */ + return nlp; /* We got a match */ + } + + /* + * Filename from archive not found in namelist. + * If we have the whole namelist here, just return 0. + * Otherwise, read the next name in and compare it. + * If this was the last name, namelist->found will remain on. + * If not, we loop to compare the newly read name. + */ + if (f_sorted_names && namelist->found) + { + name_gather (); /* Read one more */ + if (!namelist->found) + goto again; + } + return (struct name *) 0; +} + +/* This returns a name from the namelist which doesn't have ->found set. + It sets ->found before returning, so successive calls will find and return + all the non-found names in the namelist */ + +struct name *gnu_list_name; + +char * +name_from_list () +{ + if (!gnu_list_name) + gnu_list_name = namelist; + while (gnu_list_name && gnu_list_name->found) + gnu_list_name = gnu_list_name->next; + if (gnu_list_name) + { + gnu_list_name->found++; + if (gnu_list_name->change_dir) + if (chdir (gnu_list_name->change_dir) < 0) + msg_perror ("can't chdir to %s", gnu_list_name->change_dir); + return gnu_list_name->name; + } + return (char *) 0; +} + +void +blank_name_list () +{ + struct name *n; + + gnu_list_name = 0; + for (n = namelist; n; n = n->next) + n->found = 0; +} + +char * +new_name (path, name) + char *path, *name; +{ + char *path_buf; + + path_buf = (char *) malloc (strlen (path) + strlen (name) + 2); + if (path_buf == 0) + { + msg ("Can't allocate memory for name '%s/%s", path, name); + exit (EX_SYSTEM); + } + (void) sprintf (path_buf, "%s/%s", path, name); + return path_buf; +} + +/* returns non-zero if the luser typed 'y' or 'Y', zero otherwise. */ + +int +confirm (action, file) + char *action, *file; +{ + int c, nl; + static FILE *confirm_file = 0; + extern FILE *msg_file; + extern char TTY_NAME[]; + + fprintf (msg_file, "%s %s?", action, file); + fflush (msg_file); + if (!confirm_file) + { + confirm_file = (archive == 0) ? fopen (TTY_NAME, "r") : stdin; + if (!confirm_file) + { + msg ("Can't read confirmation from user"); + exit (EX_SYSTEM); + } + } + c = getc (confirm_file); + for (nl = c; nl != '\n' && nl != EOF; nl = getc (confirm_file)) + ; + return (c == 'y' || c == 'Y'); +} + +char *x_buffer = 0; +int size_x_buffer; +int free_x_buffer; + +char **exclude = 0; +int size_exclude = 0; +int free_exclude = 0; + +char **re_exclude = 0; +int size_re_exclude = 0; +int free_re_exclude = 0; + +void +add_exclude (name) + char *name; +{ + /* char *rname;*/ + /* char **tmp_ptr;*/ + int size_buf; + + un_quote_string (name); + size_buf = strlen (name); + + if (x_buffer == 0) + { + x_buffer = (char *) ck_malloc (size_buf + 1024); + free_x_buffer = 1024; + } + else if (free_x_buffer <= size_buf) + { + char *old_x_buffer; + char **tmp_ptr; + + old_x_buffer = x_buffer; + x_buffer = (char *) ck_realloc (x_buffer, size_x_buffer + 1024); + free_x_buffer = 1024; + for (tmp_ptr = exclude; tmp_ptr < exclude + size_exclude; tmp_ptr++) + *tmp_ptr = x_buffer + ((*tmp_ptr) - old_x_buffer); + for (tmp_ptr = re_exclude; tmp_ptr < re_exclude + size_re_exclude; tmp_ptr++) + *tmp_ptr = x_buffer + ((*tmp_ptr) - old_x_buffer); + } + + if (is_regex (name)) + { + if (free_re_exclude == 0) + { + re_exclude = (char **) (re_exclude ? ck_realloc (re_exclude, (size_re_exclude + 32) * sizeof (char *)): ck_malloc (sizeof (char *) * 32)); + free_re_exclude += 32; + } + re_exclude[size_re_exclude] = x_buffer + size_x_buffer; + size_re_exclude++; + free_re_exclude--; + } + else + { + if (free_exclude == 0) + { + exclude = (char **) (exclude ? ck_realloc (exclude, (size_exclude + 32) * sizeof (char *)): ck_malloc (sizeof (char *) * 32)); + free_exclude += 32; + } + exclude[size_exclude] = x_buffer + size_x_buffer; + size_exclude++; + free_exclude--; + } + strcpy (x_buffer + size_x_buffer, name); + size_x_buffer += size_buf + 1; + free_x_buffer -= size_buf + 1; +} + +void +add_exclude_file (file) + char *file; +{ + FILE *fp; + char buf[1024]; + + if (strcmp (file, "-")) + fp = fopen (file, "r"); + else + /* Let's hope the person knows what they're doing. */ + /* Using -X - -T - -f - will get you *REALLY* strange + results. . . */ + fp = stdin; + + if (!fp) + { + msg_perror ("can't open %s", file); + exit (2); + } + while (fgets (buf, 1024, fp)) + { + /* int size_buf;*/ + char *end_str; + + end_str = rindex (buf, '\n'); + if (end_str) + *end_str = '\0'; + add_exclude (buf); + + } + fclose (fp); +} + +int +is_regex (str) + char *str; +{ + return index (str, '*') || index (str, '[') || index (str, '?'); +} + +/* Returns non-zero if the file 'name' should not be added/extracted */ +int +check_exclude (name) + char *name; +{ + int n; + char *str; + extern char *strstr (); + + for (n = 0; n < size_re_exclude; n++) + { + if (fnmatch (re_exclude[n], name, FNM_LEADING_DIR) == 0) + return 1; + } + for (n = 0; n < size_exclude; n++) + { + /* Accept the output from strstr only if it is the last + part of the string. There is certainly a faster way to + do this. . . */ + if ((str = strstr (name, exclude[n])) + && (str == name || str[-1] == '/') + && str[strlen (exclude[n])] == '\0') + return 1; + } + return 0; +} diff --git a/gnu/usr.bin/tar/tar.h b/gnu/usr.bin/tar/tar.h new file mode 100644 index 0000000..c3fec78 --- /dev/null +++ b/gnu/usr.bin/tar/tar.h @@ -0,0 +1,291 @@ +/* Declarations for tar archives. + Copyright (C) 1988, 1992, 1993 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* major() and minor() macros (among other things) defined here for hpux */ +#ifdef hpux +#include <sys/mknod.h> +#endif + +/* + * Kludge for handling systems that can't cope with multiple + * external definitions of a variable. In ONE routine (tar.c), + * we #define TAR_EXTERN to null; here, we set it to "extern" if + * it is not already set. + */ +#ifndef TAR_EXTERN +#define TAR_EXTERN extern +#endif + +/* + * Header block on tape. + * + * I'm going to use traditional DP naming conventions here. + * A "block" is a big chunk of stuff that we do I/O on. + * A "record" is a piece of info that we care about. + * Typically many "record"s fit into a "block". + */ +#define RECORDSIZE 512 +#define NAMSIZ 100 +#define TUNMLEN 32 +#define TGNMLEN 32 +#define SPARSE_EXT_HDR 21 +#define SPARSE_IN_HDR 4 + +struct sparse + { + char offset[12]; + char numbytes[12]; + }; + +struct sp_array + { + int offset; + int numbytes; + }; + +union record + { + char charptr[RECORDSIZE]; + struct header + { + char arch_name[NAMSIZ]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char linkflag; + char arch_linkname[NAMSIZ]; + char magic[8]; + char uname[TUNMLEN]; + char gname[TGNMLEN]; + char devmajor[8]; + char devminor[8]; + /* these following fields were added by JF for gnu */ + /* and are NOT standard */ + char atime[12]; + char ctime[12]; + char offset[12]; + char longnames[4]; +#ifdef NEEDPAD + char pad; +#endif + struct sparse sp[SPARSE_IN_HDR]; + char isextended; + char realsize[12]; /* true size of the sparse file */ + /* char ending_blanks[12];*//* number of nulls at the + end of the file, if any */ + } + header; + struct extended_header + { + struct sparse sp[21]; + char isextended; + } + ext_hdr; + }; + +/* The checksum field is filled with this while the checksum is computed. */ +#define CHKBLANKS " " /* 8 blanks, no null */ + +/* The magic field is filled with this if uname and gname are valid. */ +#define TMAGIC "ustar " /* 7 chars and a null */ + +/* The linkflag defines the type of file */ +#define LF_OLDNORMAL '\0' /* Normal disk file, Unix compat */ +#define LF_NORMAL '0' /* Normal disk file */ +#define LF_LINK '1' /* Link to previously dumped file */ +#define LF_SYMLINK '2' /* Symbolic link */ +#define LF_CHR '3' /* Character special file */ +#define LF_BLK '4' /* Block special file */ +#define LF_DIR '5' /* Directory */ +#define LF_FIFO '6' /* FIFO special file */ +#define LF_CONTIG '7' /* Contiguous file */ +/* Further link types may be defined later. */ + +/* Note that the standards committee allows only capital A through + capital Z for user-defined expansion. This means that defining something + as, say '8' is a *bad* idea. */ +#define LF_DUMPDIR 'D' /* This is a dir entry that contains + the names of files that were in + the dir at the time the dump + was made */ +#define LF_LONGLINK 'K' /* Identifies the NEXT file on the tape + as having a long linkname */ +#define LF_LONGNAME 'L' /* Identifies the NEXT file on the tape + as having a long name. */ +#define LF_MULTIVOL 'M' /* This is the continuation + of a file that began on another + volume */ +#define LF_NAMES 'N' /* For storing filenames that didn't + fit in 100 characters */ +#define LF_SPARSE 'S' /* This is for sparse files */ +#define LF_VOLHDR 'V' /* This file is a tape/volume header */ +/* Ignore it on extraction */ + +/* + * Exit codes from the "tar" program + */ +#define EX_SUCCESS 0 /* success! */ +#define EX_ARGSBAD 1 /* invalid args */ +#define EX_BADFILE 2 /* invalid filename */ +#define EX_BADARCH 3 /* bad archive */ +#define EX_SYSTEM 4 /* system gave unexpected error */ +#define EX_BADVOL 5 /* Special error code means + Tape volume doesn't match the one + specified on the command line */ + +/* + * Global variables + */ +TAR_EXTERN union record *ar_block; /* Start of block of archive */ +TAR_EXTERN union record *ar_record; /* Current record of archive */ +TAR_EXTERN union record *ar_last; /* Last+1 record of archive block */ +TAR_EXTERN char ar_reading; /* 0 writing, !0 reading archive */ +TAR_EXTERN int blocking; /* Size of each block, in records */ +TAR_EXTERN int blocksize; /* Size of each block, in bytes */ +TAR_EXTERN char *info_script; /* Script to run at end of each tape change */ +TAR_EXTERN char *name_file; /* File containing names to work on */ +TAR_EXTERN char filename_terminator; /* \n or \0. */ +TAR_EXTERN char *tar; /* Name of this program */ +TAR_EXTERN struct sp_array *sparsearray; /* Pointer to the start of the scratch space */ +TAR_EXTERN int sp_array_size; /* Initial size of the sparsearray */ +TAR_EXTERN int tot_written; /* Total written to output */ +TAR_EXTERN struct re_pattern_buffer + *label_pattern; /* compiled regex for extract label */ +TAR_EXTERN char **ar_files; /* list of tape drive names */ +TAR_EXTERN int n_ar_files; /* number of tape drive names */ +TAR_EXTERN int cur_ar_file; /* tape drive currently being used */ +TAR_EXTERN int ar_files_len; /* malloced size of ar_files */ +TAR_EXTERN char *current_file_name, *current_link_name; + +/* + * Flags from the command line + */ +TAR_EXTERN int cmd_mode; +#define CMD_NONE 0 +#define CMD_CAT 1 /* -A */ +#define CMD_CREATE 2 /* -c */ +#define CMD_DIFF 3 /* -d */ +#define CMD_APPEND 4 /* -r */ +#define CMD_LIST 5 /* -t */ +#define CMD_UPDATE 6 /* -u */ +#define CMD_EXTRACT 7 /* -x */ +#define CMD_DELETE 8 /* -D */ +#define CMD_VERSION 9 /* --version */ + + +TAR_EXTERN int f_reblock; /* -B */ +#if 0 +TAR_EXTERN char f_dironly; /* -D */ +#endif +TAR_EXTERN int f_run_script_at_end; /* -F */ +TAR_EXTERN int f_gnudump; /* -G */ +TAR_EXTERN int f_follow_links; /* -h */ +TAR_EXTERN int f_ignorez; /* -i */ +TAR_EXTERN int f_keep; /* -k */ +TAR_EXTERN int f_startfile; /* -K */ +TAR_EXTERN int f_local_filesys; /* -l */ +TAR_EXTERN int tape_length; /* -L */ +TAR_EXTERN int f_modified; /* -m */ +TAR_EXTERN int f_multivol; /* -M */ +TAR_EXTERN int f_new_files; /* -N */ +TAR_EXTERN int f_oldarch; /* -o */ +TAR_EXTERN int f_exstdout; /* -O */ +TAR_EXTERN int f_use_protection;/* -p */ +TAR_EXTERN int f_absolute_paths;/* -P */ +TAR_EXTERN int f_sayblock; /* -R */ +TAR_EXTERN int f_sorted_names; /* -s */ +TAR_EXTERN int f_sparse_files; /* -S ... JK */ +TAR_EXTERN int f_namefile; /* -T */ +TAR_EXTERN int f_verbose; /* -v */ +TAR_EXTERN char *f_volhdr; /* -V */ +TAR_EXTERN int f_confirm; /* -w */ +TAR_EXTERN int f_verify; /* -W */ +TAR_EXTERN int f_exclude; /* -X */ +TAR_EXTERN char *f_compressprog; /* -z and -Z */ +TAR_EXTERN int f_do_chown; /* --do-chown */ +TAR_EXTERN int f_totals; /* --totals */ +TAR_EXTERN int f_remove_files; /* --remove-files */ +TAR_EXTERN int f_ignore_failed_read; /* --ignore-failed-read */ +TAR_EXTERN int f_checkpoint; /* --checkpoint */ +TAR_EXTERN int f_show_omitted_dirs; /* --show-omitted-dirs */ +TAR_EXTERN char *f_volno_file; /* --volno-file */ +TAR_EXTERN int f_force_local; /* --force-local */ +TAR_EXTERN int f_atime_preserve;/* --atime-preserve */ +TAR_EXTERN int f_compress_block; /* --compress-block */ + +/* + * We default to Unix Standard format rather than 4.2BSD tar format. + * The code can actually produce all three: + * f_standard ANSI standard + * f_oldarch V7 + * neither 4.2BSD + * but we don't bother, since 4.2BSD can read ANSI standard format anyway. + * The only advantage to the "neither" option is that we can cmp our + * output to the output of 4.2BSD tar, for debugging. + */ +#define f_standard (!f_oldarch) + +/* + * Structure for keeping track of filenames and lists thereof. + */ +struct name + { + struct name *next; + short length; /* cached strlen(name) */ + char found; /* A matching file has been found */ + char firstch; /* First char is literally matched */ + char regexp; /* This name is a regexp, not literal */ + char *change_dir; /* JF set with the -C option */ + char *dir_contents; /* JF for f_gnudump */ + char fake; /* dummy entry */ + char name[1]; + }; + +TAR_EXTERN struct name *namelist; /* Points to first name in list */ +TAR_EXTERN struct name *namelast; /* Points to last name in list */ + +TAR_EXTERN int archive; /* File descriptor for archive file */ +TAR_EXTERN int errors; /* # of files in error */ + +TAR_EXTERN char *gnu_dumpfile; + +/* + * Error recovery stuff + */ +TAR_EXTERN char read_error_flag; + + +/* + * Declarations of functions available to the world. + */ +union record *findrec (); +void userec (); +union record *endofrecs (); +void anno (); + +#if defined (HAVE_VPRINTF) && __STDC__ +void msg (char *,...); +void msg_perror (char *,...); +#else +void msg (); +void msg_perror (); +#endif diff --git a/gnu/usr.bin/tar/update.c b/gnu/usr.bin/tar/update.c new file mode 100644 index 0000000..a64317c --- /dev/null +++ b/gnu/usr.bin/tar/update.c @@ -0,0 +1,585 @@ +/* Update a tar archive. + Copyright (C) 1988, 1992 Free Software Foundation + +This file is part of GNU Tar. + +GNU Tar is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Tar is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Tar; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* JF implement the 'r' 'u' and 'A' options for tar. */ +/* The 'A' option is my own invention: It means that the file-names are + tar files, and they should simply be appended to the end of the archive. + No attempt is made to block the reads from the args; if they're on raw + tape or something like that, it'll probably lose. . . */ + +#include <sys/types.h> +#include <stdio.h> +#include <errno.h> +#ifndef STDC_HEADERS +extern int errno; +#endif + +#ifdef HAVE_SYS_MTIO_H +#include <sys/ioctl.h> +#include <sys/mtio.h> +#endif + +#ifdef BSD42 +#include <sys/file.h> +#else +#ifndef V7 +#include <fcntl.h> +#endif +#endif + +#ifndef __MSDOS__ +#include <pwd.h> +#include <grp.h> +#endif + +#define STDIN 0 +#define STDOUT 1 + +#include "tar.h" +#include "port.h" +#include "rmt.h" + +int time_to_start_writing = 0; /* We've hit the end of the old stuff, + and its time to start writing new stuff + to the tape. This involves seeking + back one block and re-writing the current + block (which has been changed). */ + +char *output_start; /* Pointer to where we started to write in + the first block we write out. This is used + if we can't backspace the output and have + to null out the first part of the block */ + +extern void skip_file (); +extern void skip_extended_headers (); + +extern union record *head; +extern struct stat hstat; + +void append_file (); +void close_archive (); +int confirm (); +void decode_header (); +void fl_read (); +void fl_write (); +void flush_archive (); +int move_arch (); +struct name *name_scan (); +char *name_from_list (); +void name_expand (); +void name_gather (); +void names_notfound (); +void open_archive (); +int read_header (); +void reset_eof (); +void write_block (); +void write_eot (); + +/* Implement the 'r' (add files to end of archive), and 'u' (add files to + end of archive if they arent there, or are more up to date than the + version in the archive.) commands.*/ +void +update_archive () +{ + int found_end = 0; + int status = 3; + int prev_status; + char *p; + struct name *name; + extern void dump_file (); + + name_gather (); + if (cmd_mode == CMD_UPDATE) + name_expand (); + open_archive (2); /* Open for updating */ + + do + { + prev_status = status; + status = read_header (); + switch (status) + { + case EOF: + found_end = 1; + break; + + case 0: /* A bad record */ + userec (head); + switch (prev_status) + { + case 3: + msg ("This doesn't look like a tar archive."); + /* FALL THROUGH */ + case 2: + case 1: + msg ("Skipping to next header"); + case 0: + break; + } + break; + + /* A good record */ + case 1: + /* printf("File %s\n",head->header.name); */ + /* head->header.name[NAMSIZ-1]='\0'; */ + if (cmd_mode == CMD_UPDATE && (name = name_scan (current_file_name))) + { + + /* struct stat hstat; */ + struct stat nstat; + int head_standard; + + decode_header (head, &hstat, &head_standard, 0); + if (stat (current_file_name, &nstat) < 0) + { + msg_perror ("can't stat %s:", current_file_name); + } + else + { + if (hstat.st_mtime >= nstat.st_mtime) + name->found++; + } + } + userec (head); + if (head->header.isextended) + skip_extended_headers (); + skip_file ((long) hstat.st_size); + break; + + case 2: + ar_record = head; + found_end = 1; + break; + } + } + while (!found_end); + + reset_eof (); + time_to_start_writing = 1; + output_start = ar_record->charptr; + + while (p = name_from_list ()) + { + if (f_confirm && !confirm ("add", p)) + continue; + if (cmd_mode == CMD_CAT) + append_file (p); + else + dump_file (p, -1, 1); + } + + write_eot (); + close_archive (); + names_notfound (); +} + +/* Catenate file p to the archive without creating a header for it. It had + better be a tar file or the archive is screwed */ + +void +append_file (p) + char *p; +{ + int fd; + struct stat statbuf; + long bytes_left; + union record *start; + long bufsiz, count; + + if (0 != stat (p, &statbuf) || (fd = open (p, O_RDONLY | O_BINARY)) < 0) + { + msg_perror ("can't open file %s", p); + errors++; + return; + } + + bytes_left = statbuf.st_size; + + while (bytes_left > 0) + { + start = findrec (); + bufsiz = endofrecs ()->charptr - start->charptr; + if (bytes_left < bufsiz) + { + bufsiz = bytes_left; + count = bufsiz % RECORDSIZE; + if (count) + bzero (start->charptr + bytes_left, (int) (RECORDSIZE - count)); + } + count = read (fd, start->charptr, bufsiz); + if (count < 0) + { + msg_perror ("read error at byte %ld reading %d bytes in file %s", statbuf.st_size - bytes_left, bufsiz, p); + exit (EX_ARGSBAD); /* FOO */ + } + bytes_left -= count; + userec (start + (count - 1) / RECORDSIZE); + if (count != bufsiz) + { + msg ("%s: file shrunk by %d bytes, yark!", p, bytes_left); + abort (); + } + } + (void) close (fd); +} + +#ifdef DONTDEF +bprint (fp, buf, num) + FILE *fp; + char *buf; +{ + int c; + + if (num == 0 || num == -1) + return; + fputs (" '", fp); + while (num--) + { + c = *buf++; + if (c == '\\') + fputs ("\\\\", fp); + else if (c >= ' ' && c <= '~') + putc (c, fp); + else + switch (c) + { + case '\n': + fputs ("\\n", fp); + break; + case '\r': + fputs ("\\r", fp); + break; + case '\b': + fputs ("\\b", fp); + break; + case '\0': + /* fputs("\\-",fp); */ + break; + default: + fprintf (fp, "\\%03o", c); + break; + } + } + fputs ("'\n", fp); +} + +#endif + +int number_of_blocks_read = 0; + +int number_of_new_records = 0; +int number_of_records_needed = 0; + +union record *new_block = 0; +union record *save_block = 0; + +void +junk_archive () +{ + int found_stuff = 0; + int status = 3; + int prev_status; + struct name *name; + + /* int dummy_head; */ + int number_of_records_to_skip = 0; + int number_of_records_to_keep = 0; + int number_of_kept_records_in_block; + int sub_status; + extern int write_archive_to_stdout; + + /* fprintf(stderr,"Junk files\n"); */ + name_gather (); + open_archive (2); + + while (!found_stuff) + { + prev_status = status; + status = read_header (); + switch (status) + { + case EOF: + found_stuff = 1; + break; + + case 0: + userec (head); + switch (prev_status) + { + case 3: + msg ("This doesn't look like a tar archive."); + /* FALL THROUGH */ + case 2: + case 1: + msg ("Skipping to next header"); + /* FALL THROUGH */ + case 0: + break; + } + break; + + case 1: + /* head->header.name[NAMSIZ-1] = '\0'; */ + /* fprintf(stderr,"file %s\n",head->header.name); */ + if ((name = name_scan (current_file_name)) == (struct name *) 0) + { + userec (head); + /* fprintf(stderr,"Skip %ld\n",(long)(hstat.st_size)); */ + if (head->header.isextended) + skip_extended_headers (); + skip_file ((long) (hstat.st_size)); + break; + } + name->found = 1; + found_stuff = 2; + break; + + case 2: + found_stuff = 1; + break; + } + } + /* fprintf(stderr,"Out of first loop\n"); */ + + if (found_stuff != 2) + { + write_eot (); + close_archive (); + names_notfound (); + return; + } + + if (write_archive_to_stdout) + write_archive_to_stdout = 0; + new_block = (union record *) malloc (blocksize); + if (new_block == 0) + { + msg ("Can't allocate secondary block of %d bytes", blocksize); + exit (EX_SYSTEM); + } + + /* Save away records before this one in this block */ + number_of_new_records = ar_record - ar_block; + number_of_records_needed = blocking - number_of_new_records; + if (number_of_new_records) + bcopy ((void *) ar_block, (void *) new_block, (number_of_new_records) * RECORDSIZE); + + /* fprintf(stderr,"Saved %d recs, need %d more\n",number_of_new_records,number_of_records_needed); */ + userec (head); + if (head->header.isextended) + skip_extended_headers (); + skip_file ((long) (hstat.st_size)); + found_stuff = 0; + /* goto flush_file; */ + + for (;;) + { + /* Fill in a block */ + /* another_file: */ + if (ar_record == ar_last) + { + /* fprintf(stderr,"New block\n"); */ + flush_archive (); + number_of_blocks_read++; + } + sub_status = read_header (); + /* fprintf(stderr,"Header type %d\n",sub_status); */ + + if (sub_status == 2 && f_ignorez) + { + userec (head); + continue; + } + if (sub_status == EOF || sub_status == 2) + { + found_stuff = 1; + bzero (new_block[number_of_new_records].charptr, RECORDSIZE * number_of_records_needed); + number_of_new_records += number_of_records_needed; + number_of_records_needed = 0; + write_block (0); + break; + } + + if (sub_status == 0) + { + msg ("Deleting non-header from archive."); + userec (head); + continue; + } + + /* Found another header. Yipee! */ + /* head->header.name[NAMSIZ-1] = '\0'; */ + /* fprintf(stderr,"File %s ",head->header.name); */ + if (name = name_scan (current_file_name)) + { + name->found = 1; + /* fprintf(stderr,"Flush it\n"); */ + /* flush_file: */ + /* decode_header(head,&hstat,&dummy_head,0); */ + userec (head); + number_of_records_to_skip = (hstat.st_size + RECORDSIZE - 1) / RECORDSIZE; + /* fprintf(stderr,"Flushing %d recs from %s\n",number_of_records_to_skip,head->header.name); */ + + while (ar_last - ar_record <= number_of_records_to_skip) + { + + /* fprintf(stderr,"Block: %d <= %d ",ar_last-ar_record,number_of_records_to_skip); */ + number_of_records_to_skip -= (ar_last - ar_record); + flush_archive (); + number_of_blocks_read++; + /* fprintf(stderr,"Block %d left\n",number_of_records_to_skip); */ + } + ar_record += number_of_records_to_skip; + /* fprintf(stderr,"Final %d\n",number_of_records_to_skip); */ + number_of_records_to_skip = 0; + continue; + } + + /* copy_header: */ + new_block[number_of_new_records] = *head; + number_of_new_records++; + number_of_records_needed--; + number_of_records_to_keep = (hstat.st_size + RECORDSIZE - 1) / RECORDSIZE; + userec (head); + if (number_of_records_needed == 0) + write_block (1); + /* copy_data: */ + number_of_kept_records_in_block = ar_last - ar_record; + if (number_of_kept_records_in_block > number_of_records_to_keep) + number_of_kept_records_in_block = number_of_records_to_keep; + + /* fprintf(stderr,"Need %d kept_in %d keep %d\n",blocking,number_of_kept_records_in_block,number_of_records_to_keep); */ + + while (number_of_records_to_keep) + { + int n; + + if (ar_record == ar_last) + { + /* fprintf(stderr,"Flush. . .\n"); */ + fl_read (); + number_of_blocks_read++; + ar_record = ar_block; + number_of_kept_records_in_block = blocking; + if (number_of_kept_records_in_block > number_of_records_to_keep) + number_of_kept_records_in_block = number_of_records_to_keep; + } + n = number_of_kept_records_in_block; + if (n > number_of_records_needed) + n = number_of_records_needed; + + /* fprintf(stderr,"Copying %d\n",n); */ + bcopy ((void *) ar_record, (void *) (new_block + number_of_new_records), n * RECORDSIZE); + number_of_new_records += n; + number_of_records_needed -= n; + ar_record += n; + number_of_records_to_keep -= n; + number_of_kept_records_in_block -= n; + /* fprintf(stderr,"Now new %d need %d keep %d keep_in %d rec %d/%d\n", + number_of_new_records,number_of_records_needed,number_of_records_to_keep, + number_of_kept_records_in_block,ar_record-ar_block,ar_last-ar_block); */ + + if (number_of_records_needed == 0) + { + write_block (1); + } + } + } + + write_eot (); + close_archive (); + names_notfound (); +} + +void +write_block (f) + int f; +{ + /* fprintf(stderr,"Write block\n"); */ + /* We've filled out a block. Write it out. */ + + /* Backspace back to where we started. . . */ + if (archive != STDIN) + (void) move_arch (-(number_of_blocks_read + 1)); + + save_block = ar_block; + ar_block = new_block; + + if (archive == STDIN) + archive = STDOUT; + fl_write (); + + if (archive == STDOUT) + archive = STDIN; + ar_block = save_block; + + if (f) + { + /* Move the tape head back to where we were */ + if (archive != STDIN) + (void) move_arch (number_of_blocks_read); + number_of_blocks_read--; + } + + number_of_records_needed = blocking; + number_of_new_records = 0; +} + +/* Move archive descriptor by n blocks worth. If n is positive we move + forward, else we move negative. If its a tape, MTIOCTOP had better + work. If its something else, we try to seek on it. If we can't + seek, we lose! */ +int +move_arch (n) + int n; +{ + long cur; + +#ifdef MTIOCTOP + struct mtop t; + int er; + + if (n > 0) + { + t.mt_op = MTFSR; + t.mt_count = n; + } + else + { + t.mt_op = MTBSR; + t.mt_count = -n; + } + if ((er = rmtioctl (archive, MTIOCTOP, &t)) >= 0) + return 1; + if (errno == EIO && (er = rmtioctl (archive, MTIOCTOP, &t)) >= 0) + return 1; +#endif + + cur = rmtlseek (archive, 0L, 1); + cur += blocksize * n; + + /* fprintf(stderr,"Fore to %x\n",cur); */ + if (rmtlseek (archive, cur, 0) != cur) + { + /* Lseek failed. Try a different method */ + msg ("Couldn't re-position archive file."); + exit (EX_BADARCH); + } + return 3; +} diff --git a/gnu/usr.bin/tar/version.c b/gnu/usr.bin/tar/version.c new file mode 100644 index 0000000..4454f62 --- /dev/null +++ b/gnu/usr.bin/tar/version.c @@ -0,0 +1 @@ +char version_string[] = "GNU tar version 1.11.2"; |