summaryrefslogtreecommitdiffstats
path: root/contrib/perl5/toke.c
diff options
context:
space:
mode:
authormarkm <markm@FreeBSD.org>2002-03-16 20:14:30 +0000
committermarkm <markm@FreeBSD.org>2002-03-16 20:14:30 +0000
commit3eac21f49bc763a6c0044b4afbc0c7ece760144f (patch)
tree4cf1274fa3ca68f7ecf6a3051e0c2243e378afc5 /contrib/perl5/toke.c
parent259bd53c06712c4ffb0ab7e06898c19ebf221b21 (diff)
downloadFreeBSD-src-3eac21f49bc763a6c0044b4afbc0c7ece760144f.zip
FreeBSD-src-3eac21f49bc763a6c0044b4afbc0c7ece760144f.tar.gz
Vendor import Perl 5.6.1
Diffstat (limited to 'contrib/perl5/toke.c')
-rw-r--r--contrib/perl5/toke.c1113
1 files changed, 704 insertions, 409 deletions
diff --git a/contrib/perl5/toke.c b/contrib/perl5/toke.c
index 48dad64..d33b95d 100644
--- a/contrib/perl5/toke.c
+++ b/contrib/perl5/toke.c
@@ -1,6 +1,6 @@
/* toke.c
*
- * Copyright (c) 1991-2000, Larry Wall
+ * Copyright (c) 1991-2001, Larry Wall
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
@@ -13,7 +13,7 @@
/*
* This file is the lexer for Perl. It's closely linked to the
- * parser, perly.y.
+ * parser, perly.y.
*
* The main routine is yylex(), which returns the next token.
*/
@@ -28,6 +28,10 @@
static char ident_too_long[] = "Identifier too long";
static void restore_rsfp(pTHXo_ void *f);
+#ifndef PERL_NO_UTF16_FILTER
+static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
+static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
+#endif
#define XFAKEBRACK 128
#define XENUMMASK 127
@@ -35,10 +39,17 @@ static void restore_rsfp(pTHXo_ void *f);
/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
#define UTF (PL_hints & HINT_UTF8)
-/* In variables name $^X, these are the legal values for X.
+/* In variables name $^X, these are the legal values for X.
* 1999-02-27 mjd-perl-patch@plover.com */
#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
+/* On MacOS, respect nonbreaking spaces */
+#ifdef MACOS_TRADITIONAL
+#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
+#else
+#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
+#endif
+
/* LEX_* are values for PL_lex_state, the state of the lexer.
* They are arranged oddly so that the guard on the switch statement
* can get by with a single comparison (if the compiler is smart enough).
@@ -58,26 +69,24 @@ static void restore_rsfp(pTHXo_ void *f);
#define LEX_FORMLINE 1
#define LEX_KNOWNEXT 0
-/* XXX If this causes problems, set i_unistd=undef in the hint file. */
-#ifdef I_UNISTD
-# include <unistd.h> /* Needed for execv() */
-#endif
-
-
#ifdef ff_next
#undef ff_next
#endif
#ifdef USE_PURE_BISON
-YYSTYPE* yylval_pointer = NULL;
-int* yychar_pointer = NULL;
+# ifndef YYMAXLEVEL
+# define YYMAXLEVEL 100
+# endif
+YYSTYPE* yylval_pointer[YYMAXLEVEL];
+int* yychar_pointer[YYMAXLEVEL];
+int yyactlevel = -1;
# undef yylval
# undef yychar
-# define yylval (*yylval_pointer)
-# define yychar (*yychar_pointer)
-# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
+# define yylval (*yylval_pointer[yyactlevel])
+# define yychar (*yychar_pointer[yyactlevel])
+# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
# undef yylex
-# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
+# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
#endif
#include "keywords.h"
@@ -112,7 +121,7 @@ int* yychar_pointer = NULL;
* Aop : addition-level operator
* Mop : multiplication-level operator
* Eop : equality-testing operator
- * Rop : relational operator <= != gt
+ * Rop : relational operator <= != gt
*
* Also see LOP and lop() below.
*/
@@ -198,10 +207,8 @@ S_no_op(pTHX_ char *what, char *s)
if (!s)
s = oldbp;
- else {
- assert(s >= oldbp);
+ else
PL_bufptr = s;
- }
yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
if (is_first)
Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
@@ -212,8 +219,10 @@ S_no_op(pTHX_ char *what, char *s)
Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
t - PL_oldoldbufptr, PL_oldoldbufptr);
}
- else
+ else {
+ assert(s >= oldbp);
Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
+ }
PL_bufptr = oldbp;
}
@@ -265,7 +274,6 @@ S_missingterm(pTHX_ char *s)
void
Perl_deprecate(pTHX_ char *s)
{
- dTHR;
if (ckWARN(WARN_DEPRECATED))
Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
}
@@ -319,36 +327,6 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
}
#endif
-#if 0
-STATIC I32
-S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
-{
- I32 count = FILTER_READ(idx+1, sv, maxlen);
- if (count) {
- U8* tmps;
- U8* tend;
- New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
- sv_usepvn(sv, (char*)tmps, tend - tmps);
- }
- return count;
-}
-
-STATIC I32
-S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
-{
- I32 count = FILTER_READ(idx+1, sv, maxlen);
- if (count) {
- U8* tmps;
- U8* tend;
- New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
- sv_usepvn(sv, (char*)tmps, tend - tmps);
- }
- return count;
-}
-#endif
-
/*
* Perl_lex_start
* Initialize variables. Uses the Perl save_stack to save its state (for
@@ -358,7 +336,6 @@ S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
void
Perl_lex_start(pTHX_ SV *line)
{
- dTHR;
char *s;
STRLEN len;
@@ -376,13 +353,14 @@ Perl_lex_start(pTHX_ SV *line)
SAVEVPTR(PL_nextval[toke]);
}
SAVEI32(PL_nexttoke);
- PL_nexttoke = 0;
}
SAVECOPLINE(PL_curcop);
SAVEPPTR(PL_bufptr);
SAVEPPTR(PL_bufend);
SAVEPPTR(PL_oldbufptr);
SAVEPPTR(PL_oldoldbufptr);
+ SAVEPPTR(PL_last_lop);
+ SAVEPPTR(PL_last_uni);
SAVEPPTR(PL_linestart);
SAVESPTR(PL_linestr);
SAVEPPTR(PL_lex_brackstack);
@@ -410,6 +388,7 @@ Perl_lex_start(pTHX_ SV *line)
PL_lex_stuff = Nullsv;
PL_lex_repl = Nullsv;
PL_lex_inpat = 0;
+ PL_nexttoke = 0;
PL_lex_inwhat = 0;
PL_sublex_info.sub_inwhat = 0;
PL_linestr = line;
@@ -424,6 +403,7 @@ Perl_lex_start(pTHX_ SV *line)
SvTEMP_off(PL_linestr);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
PL_bufend = PL_bufptr + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
SvREFCNT_dec(PL_rs);
PL_rs = newSVpvn("\n", 1);
PL_rsfp = 0;
@@ -454,7 +434,6 @@ Perl_lex_end(pTHX)
STATIC void
S_incline(pTHX_ char *s)
{
- dTHR;
char *t;
char *n;
char *e;
@@ -463,22 +442,22 @@ S_incline(pTHX_ char *s)
CopLINE_inc(PL_curcop);
if (*s++ != '#')
return;
- while (*s == ' ' || *s == '\t') s++;
+ while (SPACE_OR_TAB(*s)) s++;
if (strnEQ(s, "line", 4))
s += 4;
else
return;
- if (*s == ' ' || *s == '\t')
+ if (SPACE_OR_TAB(*s))
s++;
- else
+ else
return;
- while (*s == ' ' || *s == '\t') s++;
+ while (SPACE_OR_TAB(*s)) s++;
if (!isDIGIT(*s))
return;
n = s;
while (isDIGIT(*s))
s++;
- while (*s == ' ' || *s == '\t')
+ while (SPACE_OR_TAB(*s))
s++;
if (*s == '"' && (t = strchr(s+1, '"'))) {
s++;
@@ -488,15 +467,21 @@ S_incline(pTHX_ char *s)
for (t = s; !isSPACE(*t); t++) ;
e = t;
}
- while (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\f')
+ while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
e++;
if (*e != '\n' && *e != '\0')
return; /* false alarm */
ch = *t;
*t = '\0';
- if (t - s > 0)
+ if (t - s > 0) {
+#ifdef USE_ITHREADS
+ Safefree(CopFILE(PL_curcop));
+#else
+ SvREFCNT_dec(CopFILEGV(PL_curcop));
+#endif
CopFILE_set(PL_curcop, s);
+ }
*t = ch;
CopLINE_set(PL_curcop, atoi(n)-1);
}
@@ -510,9 +495,8 @@ S_incline(pTHX_ char *s)
STATIC char *
S_skipspace(pTHX_ register char *s)
{
- dTHR;
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
- while (s < PL_bufend && (*s == ' ' || *s == '\t'))
+ while (s < PL_bufend && SPACE_OR_TAB(*s))
s++;
return s;
}
@@ -565,6 +549,7 @@ S_skipspace(pTHX_ register char *s)
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
= SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
/* Close the filehandle. Could be from -P preprocessor,
* STDIN, or a regular file. If we were reading code from
@@ -629,7 +614,6 @@ S_check_uni(pTHX)
{
char *s;
char *t;
- dTHR;
if (PL_oldoldbufptr != PL_last_uni)
return;
@@ -641,8 +625,8 @@ S_check_uni(pTHX)
if (ckWARN_d(WARN_AMBIGUOUS)){
char ch = *s;
*s = '\0';
- Perl_warner(aTHX_ WARN_AMBIGUOUS,
- "Warning: Use of \"%s\" without parens is ambiguous",
+ Perl_warner(aTHX_ WARN_AMBIGUOUS,
+ "Warning: Use of \"%s\" without parens is ambiguous",
PL_last_uni);
*s = ch;
}
@@ -695,7 +679,6 @@ S_uni(pTHX_ I32 f, char *s)
STATIC I32
S_lop(pTHX_ I32 f, int x, char *s)
{
- dTHR;
yylval.ival = f;
CLINE;
PL_expect = x;
@@ -722,7 +705,7 @@ S_lop(pTHX_ I32 f, int x, char *s)
* handles the token correctly.
*/
-STATIC void
+STATIC void
S_force_next(pTHX_ I32 type)
{
PL_nexttype[PL_nexttoke] = type;
@@ -755,7 +738,7 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow
{
register char *s;
STRLEN len;
-
+
start = skipspace(start);
s = start;
if (isIDFIRST_lazy_if(s,UTF) ||
@@ -797,7 +780,6 @@ S_force_ident(pTHX_ register char *s, int kind)
PL_nextval[PL_nexttoke].opval = o;
force_next(WORD);
if (kind) {
- dTHR; /* just for in_eval */
o->op_private = OPpCONST_ENTERED;
/* XXX see note in pp_entereval() for why we forgo typo
warnings if the symbol must be introduced in an eval.
@@ -819,13 +801,13 @@ Perl_str_to_version(pTHX_ SV *sv)
NV nshift = 1.0;
STRLEN len;
char *start = SvPVx(sv,len);
- bool utf = SvUTF8(sv);
+ bool utf = SvUTF8(sv) ? TRUE : FALSE;
char *end = start + len;
while (start < end) {
- I32 skip;
+ STRLEN skip;
UV n;
if (utf)
- n = utf8_to_uv((U8*)start, &skip);
+ n = utf8_to_uv((U8*)start, len, &skip, 0);
else {
n = *(U8*)start;
skip = 1;
@@ -837,7 +819,7 @@ Perl_str_to_version(pTHX_ SV *sv)
return retval;
}
-/*
+/*
* S_force_version
* Forces the next token to be a version number.
*/
@@ -857,7 +839,7 @@ S_force_version(pTHX_ char *s)
for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
SV *ver;
- s = scan_num(s);
+ s = scan_num(s, &yylval);
version = yylval.opval;
ver = cSVOPx(version)->op_sv;
if (SvPOK(ver) && !SvNIOK(ver)) {
@@ -870,7 +852,7 @@ S_force_version(pTHX_ char *s)
/* NOTE: The parser sees the package name and the VERSION swapped */
PL_nextval[PL_nexttoke].opval = version;
- force_next(WORD);
+ force_next(WORD);
return (s);
}
@@ -896,7 +878,7 @@ S_tokeq(pTHX_ SV *sv)
goto finish;
s = SvPV_force(sv, len);
- if (SvIVX(sv) == -1)
+ if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
goto finish;
send = s + len;
while (s < send && *s != '\\')
@@ -974,9 +956,11 @@ S_sublex_start(pTHX)
p = SvPV(sv, len);
nsv = newSVpvn(p, len);
+ if (SvUTF8(sv))
+ SvUTF8_on(nsv);
SvREFCNT_dec(sv);
sv = nsv;
- }
+ }
yylval.opval = (OP*)newSVOP(op_type, 0, sv);
PL_lex_stuff = Nullsv;
return THING;
@@ -1008,7 +992,6 @@ S_sublex_start(pTHX)
STATIC I32
S_sublex_push(pTHX)
{
- dTHR;
ENTER;
PL_lex_state = PL_sublex_info.super_state;
@@ -1023,6 +1006,8 @@ S_sublex_push(pTHX)
SAVEPPTR(PL_bufptr);
SAVEPPTR(PL_oldbufptr);
SAVEPPTR(PL_oldoldbufptr);
+ SAVEPPTR(PL_last_lop);
+ SAVEPPTR(PL_last_uni);
SAVEPPTR(PL_linestart);
SAVESPTR(PL_linestr);
SAVEPPTR(PL_lex_brackstack);
@@ -1034,6 +1019,7 @@ S_sublex_push(pTHX)
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
= SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
SAVEFREESV(PL_linestr);
PL_lex_dojoin = FALSE;
@@ -1066,8 +1052,11 @@ STATIC I32
S_sublex_done(pTHX)
{
if (!PL_lex_starts++) {
+ SV *sv = newSVpvn("",0);
+ if (SvUTF8(PL_linestr))
+ SvUTF8_on(sv);
PL_expect = XOPERATOR;
- yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
+ yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
return THING;
}
@@ -1082,6 +1071,7 @@ S_sublex_done(pTHX)
PL_lex_inpat = 0;
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
SAVEFREESV(PL_linestr);
PL_lex_dojoin = FALSE;
PL_lex_brackets = 0;
@@ -1182,7 +1172,7 @@ S_sublex_done(pTHX)
} (end switch)
} (end if backslash)
} (end while character to read)
-
+
*/
STATIC char *
@@ -1193,14 +1183,13 @@ S_scan_const(pTHX_ char *start)
register char *s = start; /* start of the constant */
register char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
- bool has_utf = FALSE; /* embedded \x{} */
- I32 len; /* ? */
+ bool has_utf8 = FALSE; /* embedded \x{} */
UV uv;
I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
: UTF;
- I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
+ I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
: UTF;
@@ -1280,9 +1269,9 @@ S_scan_const(pTHX_ char *start)
while (count && (c = *regparse)) {
if (c == '\\' && regparse[1])
regparse++;
- else if (c == '{')
+ else if (c == '{')
count++;
- else if (c == '}')
+ else if (c == '}')
count--;
regparse++;
}
@@ -1317,27 +1306,10 @@ S_scan_const(pTHX_ char *start)
break; /* in regexp, $ might be tail anchor */
}
- /* (now in tr/// code again) */
-
- if (*s & 0x80 && thisutf) {
- (void)utf8_to_uv((U8*)s, &len);
- if (len == 1) {
- /* illegal UTF8, make it valid */
- char *old_pvx = SvPVX(sv);
- /* need space for one extra char (NOTE: SvCUR() not set here) */
- d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
- d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
- }
- else {
- while (len--)
- *d++ = *s++;
- }
- has_utf = TRUE;
- continue;
- }
-
/* backslashes */
if (*s == '\\' && s+1 < send) {
+ bool to_be_utf8 = FALSE;
+
s++;
/* some backslashes we leave behind */
@@ -1351,7 +1323,6 @@ S_scan_const(pTHX_ char *start)
if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
{
- dTHR; /* only for ckWARN */
if (ckWARN(WARN_SYNTAX))
Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
*--s = '$';
@@ -1376,21 +1347,22 @@ S_scan_const(pTHX_ char *start)
/* FALL THROUGH */
default:
{
- dTHR;
if (ckWARN(WARN_MISC) && isALPHA(*s))
Perl_warner(aTHX_ WARN_MISC,
"Unrecognized escape \\%c passed through",
*s);
/* default action is to copy the quoted character */
- *d++ = *s++;
- continue;
+ goto default_action;
}
/* \132 indicates an octal constant */
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
- uv = (UV)scan_oct(s, 3, &len);
- s += len;
+ {
+ STRLEN len = 0; /* disallow underscores */
+ uv = (UV)scan_oct(s, 3, &len);
+ s += len;
+ }
goto NUM_ESCAPE_INSERT;
/* \x24 indicates a hex constant */
@@ -1402,44 +1374,64 @@ S_scan_const(pTHX_ char *start)
yyerror("Missing right brace on \\x{}");
e = s;
}
- uv = (UV)scan_hex(s + 1, e - s - 1, &len);
- s = e + 1;
+ else {
+ STRLEN len = 1; /* allow underscores */
+ uv = (UV)scan_hex(s + 1, e - s - 1, &len);
+ to_be_utf8 = TRUE;
+ }
+ s = e + 1;
}
else {
- uv = (UV)scan_hex(s, 2, &len);
- s += len;
+ {
+ STRLEN len = 0; /* disallow underscores */
+ uv = (UV)scan_hex(s, 2, &len);
+ s += len;
+ }
}
NUM_ESCAPE_INSERT:
/* Insert oct or hex escaped character.
- * There will always enough room in sv since such escapes will
- * be longer than any utf8 sequence they can end up as
- */
+ * There will always enough room in sv since such
+ * escapes will be longer than any UT-F8 sequence
+ * they can end up as. */
+
+ /* This spot is wrong for EBCDIC. Characters like
+ * the lowercase letters and digits are >127 in EBCDIC,
+ * so here they would need to be mapped to the Unicode
+ * repertoire. --jhi */
+
if (uv > 127) {
- if (!thisutf && !has_utf && uv > 255) {
- /* might need to recode whatever we have accumulated so far
- * if it contains any hibit chars
+ if (!has_utf8 && (to_be_utf8 || uv > 255)) {
+ /* Might need to recode whatever we have
+ * accumulated so far if it contains any
+ * hibit chars.
+ *
+ * (Can't we keep track of that and avoid
+ * this rescan? --jhi)
*/
int hicount = 0;
char *c;
+
for (c = SvPVX(sv); c < d; c++) {
- if (*c & 0x80)
+ if (UTF8_IS_CONTINUED(*c))
hicount++;
}
if (hicount) {
char *old_pvx = SvPVX(sv);
char *src, *dst;
- d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
+
+ d = SvGROW(sv,
+ SvCUR(sv) + hicount + 1) +
+ (d - old_pvx);
src = d - 1;
d += hicount;
dst = d - 1;
while (src < dst) {
- if (*src & 0x80) {
- dst--;
- uv_to_utf8((U8*)dst, (U8)*src--);
- dst--;
+ if (UTF8_IS_CONTINUED(*src)) {
+ *dst-- = UTF8_EIGHT_BIT_LO(*src);
+ *dst-- = UTF8_EIGHT_BIT_HI(*src--);
}
else {
*dst-- = *src--;
@@ -1448,9 +1440,16 @@ S_scan_const(pTHX_ char *start)
}
}
- if (thisutf || uv > 255) {
+ if (to_be_utf8 || has_utf8 || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
- has_utf = TRUE;
+ has_utf8 = TRUE;
+ if (PL_lex_inwhat == OP_TRANS &&
+ PL_sublex_info.sub_op) {
+ PL_sublex_info.sub_op->op_private |=
+ (PL_lex_repl ? OPpTRANS_FROM_UTF
+ : OPpTRANS_TO_UTF);
+ utf = TRUE;
+ }
}
else {
*d++ = (char)uv;
@@ -1469,23 +1468,28 @@ S_scan_const(pTHX_ char *start)
SV *res;
STRLEN len;
char *str;
-
+
if (!e) {
yyerror("Missing right brace on \\N{}");
e = s - 1;
goto cont_scan;
}
res = newSVpvn(s + 1, e - s - 1);
- res = new_constant( Nullch, 0, "charnames",
+ res = new_constant( Nullch, 0, "charnames",
res, Nullsv, "\\N{...}" );
+ if (has_utf8)
+ sv_utf8_upgrade(res);
str = SvPV(res,len);
- if (!has_utf && SvUTF8(res)) {
+ if (!has_utf8 && SvUTF8(res)) {
char *ostart = SvPVX(sv);
SvCUR_set(sv, d - ostart);
SvPOK_on(sv);
+ *d = '\0';
sv_utf8_upgrade(sv);
+ /* this just broke our allocation above... */
+ SvGROW(sv, send - start);
d = SvPVX(sv) + SvCUR(sv);
- has_utf = TRUE;
+ has_utf8 = TRUE;
}
if (len > e - s + 4) {
char *odest = SvPVX(sv);
@@ -1510,10 +1514,13 @@ S_scan_const(pTHX_ char *start)
*d = *s++;
if (isLOWER(*d))
*d = toUPPER(*d);
- *d++ = toCTRL(*d);
+ *d = toCTRL(*d);
+ d++;
#else
- len = *s++;
- *d++ = toCTRL(len);
+ {
+ U8 c = *s++;
+ *d++ = toCTRL(c);
+ }
#endif
continue;
@@ -1554,14 +1561,41 @@ S_scan_const(pTHX_ char *start)
continue;
} /* end if (backslash) */
- *d++ = *s++;
+ default_action:
+ if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
+ STRLEN len = (STRLEN) -1;
+ UV uv;
+ if (this_utf8) {
+ uv = utf8_to_uv((U8*)s, send - s, &len, 0);
+ }
+ if (len == (STRLEN)-1) {
+ /* Illegal UTF8 (a high-bit byte), make it valid. */
+ char *old_pvx = SvPVX(sv);
+ /* need space for one extra char (NOTE: SvCUR() not set here) */
+ d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
+ d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
+ }
+ else {
+ while (len--)
+ *d++ = *s++;
+ }
+ has_utf8 = TRUE;
+ if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
+ PL_sublex_info.sub_op->op_private |=
+ (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+ utf = TRUE;
+ }
+ continue;
+ }
+
+ *d++ = *s++;
} /* while loop to process each character */
/* terminate the string and set up the sv */
*d = '\0';
SvCUR_set(sv, d - SvPVX(sv));
SvPOK_on(sv);
- if (has_utf)
+ if (has_utf8)
SvUTF8_on(sv);
/* shrink the sv if we allocated more than we used */
@@ -1573,9 +1607,9 @@ S_scan_const(pTHX_ char *start)
/* return the substring (via yylval) only if we parsed anything */
if (s > PL_bufptr) {
if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
- sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
+ sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
sv, Nullsv,
- ( PL_lex_inwhat == OP_TRANS
+ ( PL_lex_inwhat == OP_TRANS
? "tr"
: ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
? "s"
@@ -1846,7 +1880,7 @@ S_incl_perldb(pTHX)
/* Encoded script support. filter_add() effectively inserts a
- * 'pre-processing' function into the current source input stream.
+ * 'pre-processing' function into the current source input stream.
* Note that the filter function only applies to the current source file
* (e.g., it will not affect files 'require'd or 'use'd by this one).
*
@@ -1856,7 +1890,7 @@ S_incl_perldb(pTHX)
* store private buffers and state information.
*
* The supplied datasv parameter is upgraded to a PVIO type
- * and the IoDIRP field is used to store the function pointer,
+ * and the IoDIRP/IoANY field is used to store the function pointer,
* and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
* Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
* private use must be set using malloc'd pointers.
@@ -1874,7 +1908,7 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
datasv = NEWSV(255,0);
if (!SvUPGRADE(datasv, SVt_PVIO))
Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
- IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
+ IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
IoFLAGS(datasv) |= IOf_FAKE_DIRP;
DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
funcp, SvPV_nolen(datasv)));
@@ -1882,7 +1916,7 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
av_store(PL_rsfp_filters, 0, datasv) ;
return(datasv);
}
-
+
/* Delete most recently added instance of this filter function. */
void
@@ -1894,9 +1928,9 @@ Perl_filter_del(pTHX_ filter_t funcp)
return;
/* if filter is on top of stack (usual case) just pop it off */
datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
- if (IoDIRP(datasv) == (DIR*)funcp) {
+ if (IoANY(datasv) == (void *)funcp) {
IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
- IoDIRP(datasv) = (DIR*)NULL;
+ IoANY(datasv) = (void *)NULL;
sv_free(av_pop(PL_rsfp_filters));
return;
@@ -1909,8 +1943,8 @@ Perl_filter_del(pTHX_ filter_t funcp)
/* Invoke the n'th filter function for the current rsfp. */
I32
Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
-
-
+
+
/* 0 = read one text line */
{
filter_t funcp;
@@ -1923,7 +1957,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
/* Note that we append to the line. This is handy. */
DEBUG_P(PerlIO_printf(Perl_debug_log,
"filter_read %d: from rsfp\n", idx));
- if (maxlen) {
+ if (maxlen) {
/* Want a block */
int len ;
int old_len = SvCUR(buf_sv) ;
@@ -1956,7 +1990,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
}
/* Get function pointer hidden within datasv */
- funcp = (filter_t)IoDIRP(datasv);
+ funcp = (filter_t)IoANY(datasv);
DEBUG_P(PerlIO_printf(Perl_debug_log,
"filter_read %d: via function %p (%s)\n",
idx, funcp, SvPV_nolen(datasv)));
@@ -1987,6 +2021,31 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
return (sv_gets(sv, fp, append));
}
+STATIC HV *
+S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
+{
+ GV *gv;
+
+ if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
+ return PL_curstash;
+
+ if (len > 2 &&
+ (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
+ (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
+ {
+ return GvHV(gv); /* Foo:: */
+ }
+
+ /* use constant CLASS => 'MyClass' */
+ if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
+ SV *sv;
+ if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
+ pkgname = SvPV_nolen(sv);
+ }
+ }
+
+ return gv_stashpv(pkgname, FALSE);
+}
#ifdef DEBUGGING
static char* exp_name[] =
@@ -2020,25 +2079,39 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
if we already built the token before, use it.
*/
-int
#ifdef USE_PURE_BISON
-Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
-#else
-Perl_yylex(pTHX)
+int
+Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
+{
+ int r;
+
+ yyactlevel++;
+ yylval_pointer[yyactlevel] = lvalp;
+ yychar_pointer[yyactlevel] = lcharp;
+ if (yyactlevel >= YYMAXLEVEL)
+ Perl_croak(aTHX_ "panic: YYMAXLEVEL");
+
+ r = Perl_yylex(aTHX);
+
+ yyactlevel--;
+
+ return r;
+}
+#endif
+
+#ifdef __SC__
+#pragma segment Perl_yylex
#endif
+int
+Perl_yylex(pTHX)
{
- dTHR;
register char *s;
register char *d;
register I32 tmp;
STRLEN len;
GV *gv = Nullgv;
GV **gvp = 0;
-
-#ifdef USE_PURE_BISON
- yylval_pointer = lvalp;
- yychar_pointer = lcharp;
-#endif
+ bool bof = FALSE;
/* check if there's an identifier for us to look at */
if (PL_pending_ident) {
@@ -2046,6 +2119,9 @@ Perl_yylex(pTHX)
char pit = PL_pending_ident;
PL_pending_ident = 0;
+ DEBUG_T({ PerlIO_printf(Perl_debug_log,
+ "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
+
/* if we're in a my(), we can't allow dynamics here.
$foo'bar has already been turned into $foo::bar, so
just check for colons.
@@ -2070,7 +2146,7 @@ Perl_yylex(pTHX)
}
}
- /*
+ /*
build the ops for accesses to a my() variable.
Deny my($a) or my($b) in a sort block, *if* $a or $b is
@@ -2145,9 +2221,14 @@ Perl_yylex(pTHX)
*/
if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
- if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
- yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
- PL_tokenbuf, PL_tokenbuf));
+ if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
+ && ckWARN(WARN_AMBIGUOUS))
+ {
+ /* Downgraded from fatal to warning 20000522 mjd */
+ Perl_warner(aTHX_ WARN_AMBIGUOUS,
+ "Possible unintended interpolation of %s in string",
+ PL_tokenbuf);
+ }
}
/* build ops for a bareword */
@@ -2178,6 +2259,10 @@ Perl_yylex(pTHX)
PL_expect = PL_lex_expect;
PL_lex_defer = LEX_NORMAL;
}
+ DEBUG_T({ PerlIO_printf(Perl_debug_log,
+ "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
+ (IV)PL_nexttype[PL_nexttoke]); })
+
return(PL_nexttype[PL_nexttoke]);
/* interpolated case modifiers like \L \U, including \Q and \E.
@@ -2209,6 +2294,8 @@ Perl_yylex(pTHX)
return yylex();
}
else {
+ DEBUG_T({ PerlIO_printf(Perl_debug_log,
+ "### Saw case modifier at '%s'\n", PL_bufptr); })
s = PL_bufptr + 1;
if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
@@ -2259,6 +2346,8 @@ Perl_yylex(pTHX)
case LEX_INTERPSTART:
if (PL_bufptr == PL_bufend)
return sublex_done();
+ DEBUG_T({ PerlIO_printf(Perl_debug_log,
+ "### Interpolated variable at '%s'\n", PL_bufptr); })
PL_expect = XTERM;
PL_lex_dojoin = (*PL_bufptr == '@');
PL_lex_state = LEX_INTERPNORMAL;
@@ -2355,7 +2444,7 @@ Perl_yylex(pTHX)
s = PL_bufptr;
PL_oldoldbufptr = PL_oldbufptr;
PL_oldbufptr = s;
- DEBUG_p( {
+ DEBUG_T( {
PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
exp_name[PL_expect], s);
} )
@@ -2375,6 +2464,9 @@ Perl_yylex(pTHX)
PL_last_lop = 0;
if (PL_lex_brackets)
yyerror("Missing right curly or square bracket");
+ DEBUG_T( { PerlIO_printf(Perl_debug_log,
+ "### Tokener got EOF\n");
+ } )
TOKEN(0);
}
if (s++ < PL_bufend)
@@ -2430,6 +2522,7 @@ Perl_yylex(pTHX)
sv_catpv(PL_linestr, "\n");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
if (PERLDB_LINE && PL_curstash != PL_debstash) {
SV *sv = NEWSV(85,0);
@@ -2440,6 +2533,7 @@ Perl_yylex(pTHX)
goto retry;
}
do {
+ bof = PL_rsfp ? TRUE : FALSE;
if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
fake_eof:
if (PL_rsfp) {
@@ -2457,13 +2551,45 @@ Perl_yylex(pTHX)
sv_catpv(PL_linestr,";}");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
PL_minus_n = PL_minus_p = 0;
goto retry;
}
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
sv_setpv(PL_linestr,"");
TOKEN(';'); /* not infinite loop because rsfp is NULL now */
}
+ /* if it looks like the start of a BOM, check if it in fact is */
+ else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
+#ifdef PERLIO_IS_STDIO
+# ifdef __GNU_LIBRARY__
+# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
+# define FTELL_FOR_PIPE_IS_BROKEN
+# endif
+# else
+# ifdef __GLIBC__
+# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
+# define FTELL_FOR_PIPE_IS_BROKEN
+# endif
+# endif
+# endif
+#endif
+#ifdef FTELL_FOR_PIPE_IS_BROKEN
+ /* This loses the possibility to detect the bof
+ * situation on perl -P when the libc5 is being used.
+ * Workaround? Maybe attach some extra state to PL_rsfp?
+ */
+ if (!PL_preprocess)
+ bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
+#else
+ bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
+#endif
+ if (bof) {
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ s = swallow_bom((U8*)s);
+ }
+ }
if (PL_doextract) {
if (*s == '#' && s[1] == '!' && instr(s,"perl"))
PL_doextract = FALSE;
@@ -2473,6 +2599,7 @@ Perl_yylex(pTHX)
sv_setpv(PL_linestr, "");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
PL_doextract = FALSE;
}
}
@@ -2487,6 +2614,7 @@ Perl_yylex(pTHX)
av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
}
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
if (CopLINE(PL_curcop) == 1) {
while (s < PL_bufend && isSPACE(*s))
s++;
@@ -2575,6 +2703,7 @@ Perl_yylex(pTHX)
*s = '#'; /* Don't try to parse shebang line */
}
#endif /* ALTERNATE_SHEBANG */
+#ifndef MACOS_TRADITIONAL
if (!d &&
*s == '#' &&
ipathend > ipath &&
@@ -2599,16 +2728,17 @@ Perl_yylex(pTHX)
else
newargv = PL_origargv;
newargv[0] = ipath;
- PerlProc_execv(ipath, newargv);
+ PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
Perl_croak(aTHX_ "Can't exec %s", ipath);
}
+#endif
if (d) {
U32 oldpdb = PL_perldb;
bool oldn = PL_minus_n;
bool oldp = PL_minus_p;
while (*d && !isSPACE(*d)) d++;
- while (*d == ' ' || *d == '\t') d++;
+ while (SPACE_OR_TAB(*d)) d++;
if (*d++ == '-') {
do {
@@ -2628,6 +2758,7 @@ Perl_yylex(pTHX)
sv_setpv(PL_linestr, "");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
PL_preambled = FALSE;
if (PERLDB_LINE)
(void)gv_fetchfile(PL_origfilename);
@@ -2646,15 +2777,23 @@ Perl_yylex(pTHX)
case '\r':
#ifdef PERL_STRICT_CR
Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
- Perl_croak(aTHX_
+ Perl_croak(aTHX_
"\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
#endif
case ' ': case '\t': case '\f': case 013:
+#ifdef MACOS_TRADITIONAL
+ case '\312':
+#endif
s++;
goto retry;
case '#':
case '\n':
if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
+ if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
+ /* handle eval qq[#line 1 "foo"\n ...] */
+ CopLINE_dec(PL_curcop);
+ incline(s);
+ }
d = PL_bufend;
while (s < d && *s != '\n')
s++;
@@ -2674,51 +2813,66 @@ Perl_yylex(pTHX)
goto retry;
case '-':
if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
+ I32 ftst = 0;
+
s++;
PL_bufptr = s;
tmp = *s++;
- while (s < PL_bufend && (*s == ' ' || *s == '\t'))
+ while (s < PL_bufend && SPACE_OR_TAB(*s))
s++;
if (strnEQ(s,"=>",2)) {
s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
+ DEBUG_T( { PerlIO_printf(Perl_debug_log,
+ "### Saw unary minus before =>, forcing word '%s'\n", s);
+ } )
OPERATOR('-'); /* unary minus */
}
PL_last_uni = PL_oldbufptr;
- PL_last_lop_op = OP_FTEREAD; /* good enough */
switch (tmp) {
- case 'r': FTST(OP_FTEREAD);
- case 'w': FTST(OP_FTEWRITE);
- case 'x': FTST(OP_FTEEXEC);
- case 'o': FTST(OP_FTEOWNED);
- case 'R': FTST(OP_FTRREAD);
- case 'W': FTST(OP_FTRWRITE);
- case 'X': FTST(OP_FTREXEC);
- case 'O': FTST(OP_FTROWNED);
- case 'e': FTST(OP_FTIS);
- case 'z': FTST(OP_FTZERO);
- case 's': FTST(OP_FTSIZE);
- case 'f': FTST(OP_FTFILE);
- case 'd': FTST(OP_FTDIR);
- case 'l': FTST(OP_FTLINK);
- case 'p': FTST(OP_FTPIPE);
- case 'S': FTST(OP_FTSOCK);
- case 'u': FTST(OP_FTSUID);
- case 'g': FTST(OP_FTSGID);
- case 'k': FTST(OP_FTSVTX);
- case 'b': FTST(OP_FTBLK);
- case 'c': FTST(OP_FTCHR);
- case 't': FTST(OP_FTTTY);
- case 'T': FTST(OP_FTTEXT);
- case 'B': FTST(OP_FTBINARY);
- case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
- case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
- case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
+ case 'r': ftst = OP_FTEREAD; break;
+ case 'w': ftst = OP_FTEWRITE; break;
+ case 'x': ftst = OP_FTEEXEC; break;
+ case 'o': ftst = OP_FTEOWNED; break;
+ case 'R': ftst = OP_FTRREAD; break;
+ case 'W': ftst = OP_FTRWRITE; break;
+ case 'X': ftst = OP_FTREXEC; break;
+ case 'O': ftst = OP_FTROWNED; break;
+ case 'e': ftst = OP_FTIS; break;
+ case 'z': ftst = OP_FTZERO; break;
+ case 's': ftst = OP_FTSIZE; break;
+ case 'f': ftst = OP_FTFILE; break;
+ case 'd': ftst = OP_FTDIR; break;
+ case 'l': ftst = OP_FTLINK; break;
+ case 'p': ftst = OP_FTPIPE; break;
+ case 'S': ftst = OP_FTSOCK; break;
+ case 'u': ftst = OP_FTSUID; break;
+ case 'g': ftst = OP_FTSGID; break;
+ case 'k': ftst = OP_FTSVTX; break;
+ case 'b': ftst = OP_FTBLK; break;
+ case 'c': ftst = OP_FTCHR; break;
+ case 't': ftst = OP_FTTTY; break;
+ case 'T': ftst = OP_FTTEXT; break;
+ case 'B': ftst = OP_FTBINARY; break;
+ case 'M': case 'A': case 'C':
+ gv_fetchpv("\024",TRUE, SVt_PV);
+ switch (tmp) {
+ case 'M': ftst = OP_FTMTIME; break;
+ case 'A': ftst = OP_FTATIME; break;
+ case 'C': ftst = OP_FTCTIME; break;
+ default: break;
+ }
+ break;
default:
Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
break;
}
+ PL_last_lop_op = ftst;
+ DEBUG_T( { PerlIO_printf(Perl_debug_log,
+ "### Saw file test %c\n", (int)ftst);
+ } )
+ FTST(ftst);
}
tmp = *s++;
if (*s == tmp) {
@@ -2847,10 +3001,6 @@ Perl_yylex(pTHX)
if (*d == '(') {
d = scan_str(d,TRUE,TRUE);
if (!d) {
- if (PL_lex_stuff) {
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
- }
/* MUST advance bufptr here to avoid bogus
"at end of line" context messages from yyerror().
*/
@@ -2870,9 +3020,21 @@ Perl_yylex(pTHX)
PL_lex_stuff = Nullsv;
}
else {
- attrs = append_elem(OP_LIST, attrs,
- newSVOP(OP_CONST, 0,
- newSVpvn(s, len)));
+ if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
+ CvLVALUE_on(PL_compcv);
+ else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
+ CvLOCKED_on(PL_compcv);
+ else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
+ CvMETHOD_on(PL_compcv);
+ /* After we've set the flags, it could be argued that
+ we don't need to do the attributes.pm-based setting
+ process, and shouldn't bother appending recognized
+ flags. To experiment with that, uncomment the
+ following "else": */
+ /* else */
+ attrs = append_elem(OP_LIST, attrs,
+ newSVOP(OP_CONST, 0,
+ newSVpvn(s, len)));
}
s = skipspace(d);
if (*s == ':' && s[1] != ':')
@@ -2917,8 +3079,7 @@ Perl_yylex(pTHX)
PL_expect = XTERM;
TOKEN('(');
case ';':
- if (CopLINE(PL_curcop) < PL_copline)
- PL_copline = CopLINE(PL_curcop);
+ CLINE;
tmp = *s++;
OPERATOR(tmp);
case ')':
@@ -2962,24 +3123,27 @@ Perl_yylex(pTHX)
PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
OPERATOR(HASHBRACK);
case XOPERATOR:
- while (s < PL_bufend && (*s == ' ' || *s == '\t'))
+ while (s < PL_bufend && SPACE_OR_TAB(*s))
s++;
d = s;
PL_tokenbuf[0] = '\0';
if (d < PL_bufend && *d == '-') {
PL_tokenbuf[0] = '-';
d++;
- while (d < PL_bufend && (*d == ' ' || *d == '\t'))
+ while (d < PL_bufend && SPACE_OR_TAB(*d))
d++;
}
if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
FALSE, &len);
- while (d < PL_bufend && (*d == ' ' || *d == '\t'))
+ while (d < PL_bufend && SPACE_OR_TAB(*d))
d++;
if (*d == '}') {
char minus = (PL_tokenbuf[0] == '-');
s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
+ PL_nextval[PL_nexttoke-1].opval)
+ SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
if (minus)
force_next('-');
}
@@ -3095,7 +3259,7 @@ Perl_yylex(pTHX)
yyerror("Unmatched right curly bracket");
else
PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
- if (PL_lex_brackets < PL_lex_formbrack)
+ if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
PL_lex_formbrack = 0;
if (PL_lex_state == LEX_INTERPNORMAL) {
if (PL_lex_brackets == 0) {
@@ -3192,9 +3356,9 @@ Perl_yylex(pTHX)
if (PL_lex_brackets < PL_lex_formbrack) {
char *t;
#ifdef PERL_STRICT_CR
- for (t = s; *t == ' ' || *t == '\t'; t++) ;
+ for (t = s; SPACE_OR_TAB(*t); t++) ;
#else
- for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
+ for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
#endif
if (*t == '\n' || *t == '#') {
s--;
@@ -3417,8 +3581,8 @@ Perl_yylex(pTHX)
case '?': /* may either be conditional or pattern */
if (PL_expect != XOPERATOR) {
/* Disable warning on "study /blah/" */
- if (PL_oldoldbufptr == PL_last_uni
- && (*PL_last_uni != 's' || s - PL_last_uni < 5
+ if (PL_oldoldbufptr == PL_last_uni
+ && (*PL_last_uni != 's' || s - PL_last_uni < 5
|| memNE(PL_last_uni, "study", 5)
|| isALNUM_lazy_if(PL_last_uni+5,UTF)))
check_uni();
@@ -3462,13 +3626,19 @@ Perl_yylex(pTHX)
/* FALL THROUGH */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- s = scan_num(s);
+ s = scan_num(s, &yylval);
+ DEBUG_T( { PerlIO_printf(Perl_debug_log,
+ "### Saw number in '%s'\n", s);
+ } )
if (PL_expect == XOPERATOR)
no_op("Number",s);
TERM(THING);
case '\'':
s = scan_str(s,FALSE,FALSE);
+ DEBUG_T( { PerlIO_printf(Perl_debug_log,
+ "### Saw string before '%s'\n", s);
+ } )
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
PL_expect = XTERM;
@@ -3485,6 +3655,9 @@ Perl_yylex(pTHX)
case '"':
s = scan_str(s,FALSE,FALSE);
+ DEBUG_T( { PerlIO_printf(Perl_debug_log,
+ "### Saw string before '%s'\n", s);
+ } )
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
PL_expect = XTERM;
@@ -3498,7 +3671,7 @@ Perl_yylex(pTHX)
missingterm((char*)0);
yylval.ival = OP_CONST;
for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
- if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
+ if (*d == '$' || *d == '@' || *d == '\\' || UTF8_IS_CONTINUED(*d)) {
yylval.ival = OP_STRINGIFY;
break;
}
@@ -3507,6 +3680,9 @@ Perl_yylex(pTHX)
case '`':
s = scan_str(s,FALSE,FALSE);
+ DEBUG_T( { PerlIO_printf(Perl_debug_log,
+ "### Saw backtick string before '%s'\n", s);
+ } )
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
if (!s)
@@ -3532,7 +3708,7 @@ Perl_yylex(pTHX)
while (isDIGIT(*start) || *start == '_')
start++;
if (*start == '.' && isDIGIT(start[1])) {
- s = scan_num(s);
+ s = scan_num(s, &yylval);
TERM(THING);
}
/* avoid v123abc() or $h{v1}, allow C<print v10;> */
@@ -3543,7 +3719,7 @@ Perl_yylex(pTHX)
gv = gv_fetchpv(s, FALSE, SVt_PVCV);
*start = c;
if (!gv) {
- s = scan_num(s);
+ s = scan_num(s, &yylval);
TERM(THING);
}
}
@@ -3618,10 +3794,12 @@ Perl_yylex(pTHX)
tmp = keyword(PL_tokenbuf, len);
/* Is this a word before a => operator? */
- if (strnEQ(d,"=>",2)) {
+ if (*d == '=' && d[1] == '>') {
CLINE;
yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
yylval.opval->op_private = OPpCONST_BARE;
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+ SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
TERM(WORD);
}
@@ -3706,7 +3884,7 @@ Perl_yylex(pTHX)
PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
{
if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
- Perl_warner(aTHX_ WARN_BAREWORD,
+ Perl_warner(aTHX_ WARN_BAREWORD,
"Bareword \"%s\" refers to nonexistent package",
PL_tokenbuf);
len -= 2;
@@ -3773,14 +3951,24 @@ Perl_yylex(pTHX)
}
}
- /* If followed by a paren, it's certainly a subroutine. */
PL_expect = XOPERATOR;
s = skipspace(s);
+
+ /* Is this a word before a => operator? */
+ if (*s == '=' && s[1] == '>') {
+ CLINE;
+ sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+ SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
+ TERM(WORD);
+ }
+
+ /* If followed by a paren, it's certainly a subroutine. */
if (*s == '(') {
CLINE;
if (gv && GvCVu(gv)) {
- for (d = s + 1; *d == ' ' || *d == '\t'; d++) ;
+ for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
s = d + 1;
goto its_constant;
@@ -3917,11 +4105,11 @@ Perl_yylex(pTHX)
/* Mark this internal pseudo-handle as clean */
IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
if (PL_preprocess)
- IoTYPE(GvIOp(gv)) = '|';
+ IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
- IoTYPE(GvIOp(gv)) = '-';
+ IoTYPE(GvIOp(gv)) = IoTYPE_STD;
else
- IoTYPE(GvIOp(gv)) = '<';
+ IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
/* if the script was opened in binmode, we need to revert
* it to text mode for compatibility; but only iff it has CRs
@@ -3930,14 +4118,14 @@ Perl_yylex(pTHX)
&& PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
{
Off_t loc = 0;
- if (IoTYPE(GvIOp(gv)) == '<') {
+ if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
loc = PerlIO_tell(PL_rsfp);
(void)PerlIO_seek(PL_rsfp, 0L, 0);
}
if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
#if defined(__BORLANDC__)
/* XXX see note in do_binmode() */
- ((FILE*)PL_rsfp)->flags |= _F_BIN;
+ ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
#endif
if (loc > 0)
PerlIO_seek(PL_rsfp, loc, 0);
@@ -4094,7 +4282,7 @@ Perl_yylex(pTHX)
case KEY_exists:
UNI(OP_EXISTS);
-
+
case KEY_exit:
UNI(OP_EXIT);
@@ -4298,7 +4486,7 @@ Perl_yylex(pTHX)
case KEY_last:
s = force_word(s,WORD,TRUE,FALSE,FALSE);
LOOPX(OP_LAST);
-
+
case KEY_lc:
UNI(OP_LC);
@@ -4366,7 +4554,7 @@ Perl_yylex(pTHX)
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
goto really_sub;
- PL_in_my_stash = gv_stashpv(PL_tokenbuf, FALSE);
+ PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
if (!PL_in_my_stash) {
char tmpbuf[1024];
PL_bufptr = s;
@@ -4443,7 +4631,7 @@ Perl_yylex(pTHX)
case KEY_pos:
UNI(OP_POS);
-
+
case KEY_pack:
LOP(OP_PACK,XTERM);
@@ -4474,6 +4662,7 @@ Perl_yylex(pTHX)
int warned = 0;
d = SvPV_force(PL_lex_stuff, len);
while (len) {
+ SV *sv;
for (; isSPACE(*d) && len; --len, ++d) ;
if (len) {
char *b = d;
@@ -4494,8 +4683,11 @@ Perl_yylex(pTHX)
else {
for (; !isSPACE(*d) && len; --len, ++d) ;
}
+ sv = newSVpvn(b, d-b);
+ if (DO_UTF8(PL_lex_stuff))
+ SvUTF8_on(sv);
words = append_elem(OP_LIST, words,
- newSVOP(OP_CONST, 0, newSVpvn(b, d-b)));
+ newSVOP(OP_CONST, 0, tokeq(sv)));
}
}
if (words) {
@@ -4503,9 +4695,10 @@ Perl_yylex(pTHX)
force_next(THING);
}
}
- if (PL_lex_stuff)
+ if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ PL_lex_stuff = Nullsv;
+ }
PL_expect = XTERM;
TOKEN('(');
@@ -4605,7 +4798,7 @@ Perl_yylex(pTHX)
case KEY_chomp:
UNI(OP_CHOMP);
-
+
case KEY_scalar:
UNI(OP_SCALAR);
@@ -4773,12 +4966,8 @@ Perl_yylex(pTHX)
char *p;
s = scan_str(s,FALSE,FALSE);
- if (!s) {
- if (PL_lex_stuff)
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
- }
/* strip spaces */
d = SvPVX(PL_lex_stuff);
tmp = 0;
@@ -4894,7 +5083,7 @@ Perl_yylex(pTHX)
case KEY_umask:
if (ckWARN(WARN_UMASK)) {
for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
- if (*d != '0' && isDIGIT(*d))
+ if (*d != '0' && isDIGIT(*d))
Perl_warner(aTHX_ WARN_UMASK,
"umask: argument is missing initial 0");
}
@@ -4949,7 +5138,7 @@ Perl_yylex(pTHX)
{
static char ctl_l[2];
- if (ctl_l[0] == '\0')
+ if (ctl_l[0] == '\0')
ctl_l[0] = toCTRL('L');
gv_fetchpv(ctl_l,TRUE, SVt_PV);
}
@@ -4974,6 +5163,9 @@ Perl_yylex(pTHX)
}
}}
}
+#ifdef __SC__
+#pragma segment Main
+#endif
I32
Perl_keyword(pTHX_ register char *d, I32 len)
@@ -5026,12 +5218,12 @@ Perl_keyword(pTHX_ register char *d, I32 len)
if (strEQ(d,"cos")) return -KEY_cos;
break;
case 4:
- if (strEQ(d,"chop")) return KEY_chop;
+ if (strEQ(d,"chop")) return -KEY_chop;
break;
case 5:
if (strEQ(d,"close")) return -KEY_close;
if (strEQ(d,"chdir")) return -KEY_chdir;
- if (strEQ(d,"chomp")) return KEY_chomp;
+ if (strEQ(d,"chomp")) return -KEY_chomp;
if (strEQ(d,"chmod")) return -KEY_chmod;
if (strEQ(d,"chown")) return -KEY_chown;
if (strEQ(d,"crypt")) return -KEY_crypt;
@@ -5093,7 +5285,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
if (strEQ(d,"exit")) return -KEY_exit;
if (strEQ(d,"eval")) return KEY_eval;
if (strEQ(d,"exec")) return -KEY_exec;
- if (strEQ(d,"each")) return KEY_each;
+ if (strEQ(d,"each")) return -KEY_each;
break;
case 5:
if (strEQ(d,"elsif")) return KEY_elsif;
@@ -5243,7 +5435,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
break;
case 'k':
if (len == 4) {
- if (strEQ(d,"keys")) return KEY_keys;
+ if (strEQ(d,"keys")) return -KEY_keys;
if (strEQ(d,"kill")) return -KEY_kill;
}
break;
@@ -5334,11 +5526,11 @@ Perl_keyword(pTHX_ register char *d, I32 len)
case 'p':
switch (len) {
case 3:
- if (strEQ(d,"pop")) return KEY_pop;
+ if (strEQ(d,"pop")) return -KEY_pop;
if (strEQ(d,"pos")) return KEY_pos;
break;
case 4:
- if (strEQ(d,"push")) return KEY_push;
+ if (strEQ(d,"push")) return -KEY_push;
if (strEQ(d,"pack")) return -KEY_pack;
if (strEQ(d,"pipe")) return -KEY_pipe;
break;
@@ -5445,7 +5637,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
case 'h':
switch (len) {
case 5:
- if (strEQ(d,"shift")) return KEY_shift;
+ if (strEQ(d,"shift")) return -KEY_shift;
break;
case 6:
if (strEQ(d,"shmctl")) return -KEY_shmctl;
@@ -5474,7 +5666,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
case 'p':
if (strEQ(d,"split")) return KEY_split;
if (strEQ(d,"sprintf")) return -KEY_sprintf;
- if (strEQ(d,"splice")) return KEY_splice;
+ if (strEQ(d,"splice")) return -KEY_splice;
break;
case 'q':
if (strEQ(d,"sqrt")) return -KEY_sqrt;
@@ -5554,7 +5746,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
if (strEQ(d,"unlink")) return -KEY_unlink;
break;
case 7:
- if (strEQ(d,"unshift")) return KEY_unshift;
+ if (strEQ(d,"unshift")) return -KEY_unshift;
if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
break;
}
@@ -5600,7 +5792,6 @@ S_checkcomma(pTHX_ register char *s, char *name, char *what)
char *w;
if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
- dTHR; /* only for ckWARN */
if (ckWARN(WARN_SYNTAX)) {
int level = 1;
for (w = s+2; *w && level; w++) {
@@ -5655,18 +5846,27 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
SV **cvp;
SV *cv, *typesv;
const char *why1, *why2, *why3;
-
+
if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
SV *msg;
- why1 = "%^H is not consistent";
why2 = strEQ(key,"charnames")
- ? " (missing \"use charnames ...\"?)"
+ ? "(possibly a missing \"use charnames ...\")"
: "";
- why3 = "";
+ msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
+ (type ? type: "undef"), why2);
+
+ /* This is convoluted and evil ("goto considered harmful")
+ * but I do not understand the intricacies of all the different
+ * failure modes of %^H in here. The goal here is to make
+ * the most probable error message user-friendly. --jhi */
+
+ goto msgdone;
+
report:
- msg = Perl_newSVpvf(aTHX_ "constant(%s): %s%s%s",
+ msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
(type ? type: "undef"), why1, why2, why3);
+ msgdone:
yyerror(SvPVX(msg));
SvREFCNT_dec(msg);
return sv;
@@ -5686,24 +5886,23 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
typesv = sv_2mortal(newSVpv(type, 0));
else
typesv = &PL_sv_undef;
-
+
PUSHSTACKi(PERLSI_OVERLOAD);
ENTER ;
SAVETMPS;
-
+
PUSHMARK(SP) ;
- EXTEND(sp, 4);
+ EXTEND(sp, 3);
if (pv)
PUSHs(pv);
PUSHs(sv);
if (pv)
PUSHs(typesv);
- PUSHs(cv);
PUTBACK;
call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
-
+
SPAGAIN ;
-
+
/* Check the eval first */
if (!PL_in_eval && SvTRUE(ERRSV)) {
STRLEN n_a;
@@ -5716,12 +5915,12 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
res = POPs;
(void)SvREFCNT_inc(res);
}
-
+
PUTBACK ;
FREETMPS ;
LEAVE ;
POPSTACK;
-
+
if (!SvOK(res)) {
why1 = "Call to &{$^H{";
why2 = key;
@@ -5732,7 +5931,7 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
return res;
}
-
+
STATIC char *
S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
{
@@ -5752,9 +5951,9 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
+ else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
char *t = s + UTF8SKIP(s);
- while (*t & 0x80 && is_utf8_mark((U8*)t))
+ while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
t += UTF8SKIP(t);
if (d + (t - s) > e)
Perl_croak(aTHX_ ident_too_long);
@@ -5804,9 +6003,9 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
+ else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
char *t = s + UTF8SKIP(s);
- while (*t & 0x80 && is_utf8_mark((U8*)t))
+ while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
t += UTF8SKIP(t);
if (d + (t - s) > e)
Perl_croak(aTHX_ ident_too_long);
@@ -5847,7 +6046,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
if (isSPACE(s[-1])) {
while (s < send) {
char ch = *s++;
- if (ch != ' ' && ch != '\t') {
+ if (!SPACE_OR_TAB(ch)) {
*d = ch;
break;
}
@@ -5859,7 +6058,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
e = s;
while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
e += UTF8SKIP(e);
- while (e < send && *e & 0x80 && is_utf8_mark((U8*)e))
+ while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
e += UTF8SKIP(e);
}
Copy(s, d, e - s, char);
@@ -5873,9 +6072,8 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
Perl_croak(aTHX_ ident_too_long);
}
*d = '\0';
- while (s < send && (*s == ' ' || *s == '\t')) s++;
+ while (s < send && SPACE_OR_TAB(*s)) s++;
if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
- dTHR; /* only for ckWARN */
if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
const char *brack = *s == '[' ? "[...]" : "{...}";
Perl_warner(aTHX_ WARN_AMBIGUOUS,
@@ -5886,8 +6084,8 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
return s;
}
- }
- /* Handle extended ${^Foo} variables
+ }
+ /* Handle extended ${^Foo} variables
* 1999-02-27 mjd-perl-patch@plover.com */
else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
&& isALNUM(*s))
@@ -5907,7 +6105,6 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
if (funny == '#')
funny = '@';
if (PL_lex_state == LEX_NORMAL) {
- dTHR; /* only for ckWARN */
if (ckWARN(WARN_AMBIGUOUS) &&
(keyword(dest, d - dest) || get_cv(dest, FALSE)))
{
@@ -5953,12 +6150,8 @@ S_scan_pat(pTHX_ char *start, I32 type)
char *s;
s = scan_str(start,FALSE,FALSE);
- if (!s) {
- if (PL_lex_stuff)
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ if (!s)
Perl_croak(aTHX_ "Search pattern not terminated");
- }
pm = (PMOP*)newPMOP(type, 0);
if (PL_multi_open == '?')
@@ -5990,12 +6183,8 @@ S_scan_subst(pTHX_ char *start)
s = scan_str(start,FALSE,FALSE);
- if (!s) {
- if (PL_lex_stuff)
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ if (!s)
Perl_croak(aTHX_ "Substitution pattern not terminated");
- }
if (s[-1] == PL_multi_open)
s--;
@@ -6003,12 +6192,10 @@ S_scan_subst(pTHX_ char *start)
first_start = PL_multi_start;
s = scan_str(s,FALSE,FALSE);
if (!s) {
- if (PL_lex_stuff)
+ if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
- if (PL_lex_repl)
- SvREFCNT_dec(PL_lex_repl);
- PL_lex_repl = Nullsv;
+ PL_lex_stuff = Nullsv;
+ }
Perl_croak(aTHX_ "Substitution replacement not terminated");
}
PL_multi_start = first_start; /* so whole substitution is taken together */
@@ -6063,65 +6250,36 @@ S_scan_trans(pTHX_ char *start)
yylval.ival = OP_NULL;
s = scan_str(start,FALSE,FALSE);
- if (!s) {
- if (PL_lex_stuff)
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
+ if (!s)
Perl_croak(aTHX_ "Transliteration pattern not terminated");
- }
if (s[-1] == PL_multi_open)
s--;
s = scan_str(s,FALSE,FALSE);
if (!s) {
- if (PL_lex_stuff)
+ if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = Nullsv;
- if (PL_lex_repl)
- SvREFCNT_dec(PL_lex_repl);
- PL_lex_repl = Nullsv;
+ PL_lex_stuff = Nullsv;
+ }
Perl_croak(aTHX_ "Transliteration replacement not terminated");
}
- if (UTF) {
- o = newSVOP(OP_TRANS, 0, 0);
- utf8 = OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF;
- }
- else {
- New(803,tbl,256,short);
- o = newPVOP(OP_TRANS, 0, (char*)tbl);
- utf8 = 0;
- }
+ New(803,tbl,256,short);
+ o = newPVOP(OP_TRANS, 0, (char*)tbl);
complement = del = squash = 0;
- while (strchr("cdsCU", *s)) {
+ while (strchr("cds", *s)) {
if (*s == 'c')
complement = OPpTRANS_COMPLEMENT;
else if (*s == 'd')
del = OPpTRANS_DELETE;
else if (*s == 's')
squash = OPpTRANS_SQUASH;
- else {
- switch (count++) {
- case 0:
- if (*s == 'C')
- utf8 &= ~OPpTRANS_FROM_UTF;
- else
- utf8 |= OPpTRANS_FROM_UTF;
- break;
- case 1:
- if (*s == 'C')
- utf8 &= ~OPpTRANS_TO_UTF;
- else
- utf8 |= OPpTRANS_TO_UTF;
- break;
- default:
- Perl_croak(aTHX_ "Too many /C and /U options");
- }
- }
s++;
}
- o->op_private = del|squash|complement|utf8;
+ o->op_private = del|squash|complement|
+ (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
+ (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
PL_lex_op = o;
yylval.ival = OP_TRANS;
@@ -6131,7 +6289,6 @@ S_scan_trans(pTHX_ char *start)
STATIC char *
S_scan_heredoc(pTHX_ register char *s)
{
- dTHR;
SV *herewas;
I32 op_type = OP_SCALAR;
I32 len;
@@ -6147,7 +6304,7 @@ S_scan_heredoc(pTHX_ register char *s)
e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
if (!outer)
*d++ = '\n';
- for (peek = s; *peek == ' ' || *peek == '\t'; peek++) ;
+ for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
if (*peek && strchr("`'\"",*peek)) {
s = peek;
term = *s++;
@@ -6264,6 +6421,7 @@ S_scan_heredoc(pTHX_ register char *s)
sv_setsv(PL_linestr,herewas);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
}
else
sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
@@ -6275,6 +6433,7 @@ S_scan_heredoc(pTHX_ register char *s)
}
CopLINE_inc(PL_curcop);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
#ifndef PERL_STRICT_CR
if (PL_bufend - PL_linestart >= 2) {
if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
@@ -6316,6 +6475,8 @@ retval:
Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
}
SvREFCNT_dec(herewas);
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
+ SvUTF8_on(tmpstr);
PL_lex_stuff = tmpstr;
yylval.ival = op_type;
return s;
@@ -6466,31 +6627,30 @@ S_scan_inputsymbol(pTHX_ char *start)
calls scan_str(). s/// makes yylex() call scan_subst() which calls
scan_str(). tr/// and y/// make yylex() call scan_trans() which
calls scan_str().
-
+
It skips whitespace before the string starts, and treats the first
character as the delimiter. If the delimiter is one of ([{< then
the corresponding "close" character )]}> is used as the closing
delimiter. It allows quoting of delimiters, and if the string has
balanced delimiters ([{<>}]) it allows nesting.
- The lexer always reads these strings into lex_stuff, except in the
- case of the operators which take *two* arguments (s/// and tr///)
- when it checks to see if lex_stuff is full (presumably with the 1st
- arg to s or tr) and if so puts the string into lex_repl.
-
+ On success, the SV with the resulting string is put into lex_stuff or,
+ if that is already non-NULL, into lex_repl. The second case occurs only
+ when parsing the RHS of the special constructs s/// and tr/// (y///).
+ For convenience, the terminating delimiter character is stuffed into
+ SvIVX of the SV.
*/
STATIC char *
S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
{
- dTHR;
SV *sv; /* scalar value: string */
char *tmps; /* temp string, used for delimiter matching */
register char *s = start; /* current position in the buffer */
register char term; /* terminating character */
register char *to; /* current position in the sv's data */
I32 brackets = 1; /* bracket nesting level */
- bool has_utf = FALSE; /* is there any utf8 content? */
+ bool has_utf8 = FALSE; /* is there any utf8 content? */
/* skip space before the delimiter */
if (isSPACE(*s))
@@ -6501,8 +6661,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
/* after skipping whitespace, the next character is the terminator */
term = *s;
- if ((term & 0x80) && UTF)
- has_utf = TRUE;
+ if (UTF8_IS_CONTINUED(term) && UTF)
+ has_utf8 = TRUE;
/* mark where we are */
PL_multi_start = CopLINE(PL_curcop);
@@ -6548,8 +6708,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
have found the terminator */
else if (*s == term)
break;
- else if (!has_utf && (*s & 0x80) && UTF)
- has_utf = TRUE;
+ else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
+ has_utf8 = TRUE;
*to = *s;
}
}
@@ -6577,8 +6737,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
break;
else if (*s == PL_multi_open)
brackets++;
- else if (!has_utf && (*s & 0x80) && UTF)
- has_utf = TRUE;
+ else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
+ has_utf8 = TRUE;
*to = *s;
}
}
@@ -6632,13 +6792,14 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
/* having changed the buffer, we must update PL_bufend */
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
}
-
+
/* at this point, we have successfully read the delimited string */
if (keep_delims)
sv_catpvn(sv, s, 1);
- if (has_utf)
+ if (has_utf8)
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
s++;
@@ -6652,7 +6813,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
/* decide whether this is the first or second quoted string we've read
for this op
*/
-
+
if (PL_lex_stuff)
PL_lex_repl = sv;
else
@@ -6681,14 +6842,14 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
try converting the number to an integer and see if it can do so
without loss of precision.
*/
-
+
char *
-Perl_scan_num(pTHX_ char *start)
+Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
{
register char *s = start; /* current position in buffer */
register char *d; /* destination in temp buffer */
register char *e; /* end of temp buffer */
- NV value; /* number read, as a double */
+ NV nv; /* number read, as a double */
SV *sv = Nullsv; /* place to put the converted number */
bool floatit; /* boolean: int or float? */
char *lastub = 0; /* position of last underbar */
@@ -6699,7 +6860,7 @@ Perl_scan_num(pTHX_ char *start)
switch (*s) {
default:
Perl_croak(aTHX_ "panic: scan_num");
-
+
/* if it starts with a 0, it could be an octal number, a decimal in
0.13 disguise, or a hexadecimal number, or a binary number. */
case '0':
@@ -6714,7 +6875,6 @@ Perl_scan_num(pTHX_ char *start)
we in octal/hex/binary?" indicator to disallow hex characters
when in octal mode.
*/
- dTHR;
NV n = 0.0;
UV u = 0;
I32 shift;
@@ -6802,7 +6962,6 @@ Perl_scan_num(pTHX_ char *start)
if ((x >> shift) != u
&& !(PL_hints & HINT_NEW_BINARY)) {
- dTHR;
overflowed = TRUE;
n = (NV) u;
if (ckWARN_d(WARN_OVERFLOW))
@@ -6834,7 +6993,6 @@ Perl_scan_num(pTHX_ char *start)
out:
sv = NEWSV(92,0);
if (overflowed) {
- dTHR;
if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
Perl_warner(aTHX_ WARN_PORTABLE,
"%s number > %s non-portable",
@@ -6843,7 +7001,6 @@ Perl_scan_num(pTHX_ char *start)
}
else {
#if UVSIZE > 4
- dTHR;
if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
Perl_warner(aTHX_ WARN_PORTABLE,
"%s number > %s non-portable",
@@ -6869,11 +7026,10 @@ Perl_scan_num(pTHX_ char *start)
/* read next group of digits and _ and copy into d */
while (isDIGIT(*s) || *s == '_') {
- /* skip underscores, checking for misplaced ones
+ /* skip underscores, checking for misplaced ones
if -w is on
*/
if (*s == '_') {
- dTHR; /* only for ckWARN */
if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3)
Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
lastub = ++s;
@@ -6889,7 +7045,6 @@ Perl_scan_num(pTHX_ char *start)
/* final misplaced underbar check */
if (lastub && s - lastub != 3) {
- dTHR;
if (ckWARN(WARN_SYNTAX))
Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
}
@@ -6945,40 +7100,9 @@ Perl_scan_num(pTHX_ char *start)
/* make an sv from the string */
sv = NEWSV(92,0);
- /* unfortunately this monster needs to be on one line or
- makedepend will be confused. */
-#if (defined(USE_64_BIT_INT) && (!defined(HAS_STRTOLL)|| !defined(HAS_STRTOULL))) || (!defined(USE_64_BIT_INT) && (!defined(HAS_STRTOL) || !defined(HAS_STRTOUL)))
+#if defined(Strtol) && defined(Strtoul)
/*
- No working strto[u]l[l]. Since atoi() doesn't do range checks,
- we need to do this the hard way.
- */
-
- value = Atof(PL_tokenbuf);
-
- /*
- See if we can make do with an integer value without loss of
- precision. We use I_V to cast to an int, because some
- compilers have issues. Then we try casting it back and see
- if it was the same. We only do this if we know we
- specifically read an integer.
-
- Note: if floatit is true, then we don't need to do the
- conversion at all.
- */
- {
- UV tryuv = U_V(value);
- if (!floatit && (NV)tryuv == value) {
- if (tryuv <= IV_MAX)
- sv_setiv(sv, (IV)tryuv);
- else
- sv_setuv(sv, tryuv);
- }
- else
- sv_setnv(sv, value);
- }
-#else
- /*
strtol/strtoll sets errno to ERANGE if the number is too big
for an integer. We try to do an integer conversion first
if no characters indicating "float" have been found.
@@ -6993,20 +7117,71 @@ Perl_scan_num(pTHX_ char *start)
else
uv = Strtoul(PL_tokenbuf, (char**)NULL, 10);
if (errno)
- floatit = TRUE; /* probably just too large */
+ floatit = TRUE; /* Probably just too large. */
else if (*PL_tokenbuf == '-')
sv_setiv(sv, iv);
+ else if (uv <= IV_MAX)
+ sv_setiv(sv, uv); /* Prefer IVs over UVs. */
else
sv_setuv(sv, uv);
}
if (floatit) {
- value = Atof(PL_tokenbuf);
- sv_setnv(sv, value);
+ nv = Atof(PL_tokenbuf);
+ sv_setnv(sv, nv);
+ }
+#else
+ /*
+ No working strtou?ll?.
+
+ Unfortunately atol() doesn't do range checks (returning
+ LONG_MIN/LONG_MAX, and setting errno to ERANGE on overflows)
+ everywhere [1], so we cannot use use atol() (or atoll()).
+ If we could, they would be used, as Atol(), very much like
+ Strtol() and Strtoul() are used above.
+
+ [1] XXX Configure test needed to check for atol()
+ (and atoll()) overflow behaviour XXX
+
+ --jhi
+
+ We need to do this the hard way. */
+
+ nv = Atof(PL_tokenbuf);
+
+ /* See if we can make do with an integer value without loss of
+ precision. We use U_V to cast to a UV, because some
+ compilers have issues. Then we try casting it back and see
+ if it was the same [1]. We only do this if we know we
+ specifically read an integer. If floatit is true, then we
+ don't need to do the conversion at all.
+
+ [1] Note that this is lossy if our NVs cannot preserve our
+ UVs. There are metaconfig defines NV_PRESERVES_UV (a boolean)
+ and NV_PRESERVES_UV_BITS (a number), but in general we really
+ do hope all such potentially lossy platforms have strtou?ll?
+ to do a lossless IV/UV conversion.
+
+ Maybe could do some tricks with DBL_DIG, LDBL_DIG and
+ DBL_MANT_DIG and LDBL_MANT_DIG (these are already available
+ as NV_DIG and NV_MANT_DIG)?
+
+ --jhi
+ */
+ {
+ UV uv = U_V(nv);
+ if (!floatit && (NV)uv == nv) {
+ if (uv <= IV_MAX)
+ sv_setiv(sv, uv); /* Prefer IVs over UVs. */
+ else
+ sv_setuv(sv, uv);
+ }
+ else
+ sv_setnv(sv, nv);
}
#endif
if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
(PL_hints & HINT_NEW_INTEGER) )
- sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
+ sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
(floatit ? "float" : "integer"),
sv, Nullsv, NULL);
break;
@@ -7021,7 +7196,7 @@ vstring:
pos++;
if (!isALPHA(*pos)) {
UV rev;
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tmpend;
bool utf8 = FALSE;
s++; /* get past 'v' */
@@ -7066,7 +7241,8 @@ vstring:
SvREADONLY_on(sv);
if (utf8) {
SvUTF8_on(sv);
- sv_utf8_downgrade(sv, TRUE);
+ if (!UTF||IN_BYTE)
+ sv_utf8_downgrade(sv, TRUE);
}
}
}
@@ -7076,9 +7252,9 @@ vstring:
/* make the op for the constant and return */
if (sv)
- yylval.opval = newSVOP(OP_CONST, 0, sv);
+ lvalp->opval = newSVOP(OP_CONST, 0, sv);
else
- yylval.opval = Nullop;
+ lvalp->opval = Nullop;
return s;
}
@@ -7086,19 +7262,18 @@ vstring:
STATIC char *
S_scan_formline(pTHX_ register char *s)
{
- dTHR;
register char *eol;
register char *t;
SV *stuff = newSVpvn("",0);
bool needargs = FALSE;
while (!needargs) {
- if (*s == '.' || *s == '}') {
+ if (*s == '.' || *s == /*{*/'}') {
/*SUPPRESS 530*/
#ifdef PERL_STRICT_CR
- for (t = s+1;*t == ' ' || *t == '\t'; t++) ;
+ for (t = s+1;SPACE_OR_TAB(*t); t++) ;
#else
- for (t = s+1;*t == ' ' || *t == '\t' || *t == '\r'; t++) ;
+ for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
#endif
if (*t == '\n' || t == PL_bufend)
break;
@@ -7134,6 +7309,7 @@ S_scan_formline(pTHX_ register char *s)
s = filter_gets(PL_linestr, PL_rsfp, 0);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
PL_bufend = PL_bufptr + SvCUR(PL_linestr);
+ PL_last_lop = PL_last_uni = Nullch;
if (!s) {
s = PL_bufptr;
yyerror("Format not terminated");
@@ -7177,7 +7353,6 @@ S_set_csh(pTHX)
I32
Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
{
- dTHR;
I32 oldsavestack_ix = PL_savestack_ix;
CV* outsidecv = PL_compcv;
AV* comppadlist;
@@ -7230,10 +7405,12 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
return oldsavestack_ix;
}
+#ifdef __SC__
+#pragma segment Perl_yylex
+#endif
int
Perl_yywarn(pTHX_ char *s)
{
- dTHR;
PL_in_eval |= EVAL_WARNONLY;
yyerror(s);
PL_in_eval &= ~EVAL_WARNONLY;
@@ -7243,7 +7420,6 @@ Perl_yywarn(pTHX_ char *s)
int
Perl_yyerror(pTHX_ char *s)
{
- dTHR;
char *where = NULL;
char *context = NULL;
int contlen = -1;
@@ -7310,7 +7486,7 @@ Perl_yyerror(pTHX_ char *s)
qerror(msg);
if (PL_error_count >= 10) {
if (PL_in_eval && SvCUR(ERRSV))
- Perl_croak(aTHX_ "%_%s has too many errors.\n",
+ Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
ERRSV, CopFILE(PL_curcop));
else
Perl_croak(aTHX_ "%s has too many errors.\n",
@@ -7320,7 +7496,86 @@ Perl_yyerror(pTHX_ char *s)
PL_in_my_stash = Nullhv;
return 0;
}
+#ifdef __SC__
+#pragma segment Main
+#endif
+
+STATIC char*
+S_swallow_bom(pTHX_ U8 *s)
+{
+ STRLEN slen;
+ slen = SvCUR(PL_linestr);
+ switch (*s) {
+ case 0xFF:
+ if (s[1] == 0xFE) {
+ /* UTF-16 little-endian */
+ if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
+ Perl_croak(aTHX_ "Unsupported script encoding");
+#ifndef PERL_NO_UTF16_FILTER
+ DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
+ s += 2;
+ if (PL_bufend > (char*)s) {
+ U8 *news;
+ I32 newlen;
+
+ filter_add(utf16rev_textfilter, NULL);
+ New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
+ PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
+ PL_bufend - (char*)s - 1,
+ &newlen);
+ Copy(news, s, newlen, U8);
+ SvCUR_set(PL_linestr, newlen);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
+ news[newlen++] = '\0';
+ Safefree(news);
+ }
+#else
+ Perl_croak(aTHX_ "Unsupported script encoding");
+#endif
+ }
+ break;
+
+ case 0xFE:
+ if (s[1] == 0xFF) { /* UTF-16 big-endian */
+#ifndef PERL_NO_UTF16_FILTER
+ DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
+ s += 2;
+ if (PL_bufend > (char *)s) {
+ U8 *news;
+ I32 newlen;
+
+ filter_add(utf16_textfilter, NULL);
+ New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
+ PL_bufend = (char*)utf16_to_utf8(s, news,
+ PL_bufend - (char*)s,
+ &newlen);
+ Copy(news, s, newlen, U8);
+ SvCUR_set(PL_linestr, newlen);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
+ news[newlen++] = '\0';
+ Safefree(news);
+ }
+#else
+ Perl_croak(aTHX_ "Unsupported script encoding");
+#endif
+ }
+ break;
+ case 0xEF:
+ if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
+ DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
+ s += 3; /* UTF-8 */
+ }
+ break;
+ case 0:
+ if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
+ s[2] == 0xFE && s[3] == 0xFF)
+ {
+ Perl_croak(aTHX_ "Unsupported script encoding");
+ }
+ }
+ return (char*)s;
+}
#ifdef PERL_OBJECT
#include "XSUB.h"
@@ -7342,3 +7597,43 @@ restore_rsfp(pTHXo_ void *f)
PerlIO_close(PL_rsfp);
PL_rsfp = fp;
}
+
+#ifndef PERL_NO_UTF16_FILTER
+static I32
+utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
+{
+ I32 count = FILTER_READ(idx+1, sv, maxlen);
+ if (count) {
+ U8* tmps;
+ U8* tend;
+ I32 newlen;
+ New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
+ if (!*SvPV_nolen(sv))
+ /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
+ return count;
+
+ tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
+ sv_usepvn(sv, (char*)tmps, tend - tmps);
+ }
+ return count;
+}
+
+static I32
+utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
+{
+ I32 count = FILTER_READ(idx+1, sv, maxlen);
+ if (count) {
+ U8* tmps;
+ U8* tend;
+ I32 newlen;
+ if (!*SvPV_nolen(sv))
+ /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
+ return count;
+
+ New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
+ tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
+ sv_usepvn(sv, (char*)tmps, tend - tmps);
+ }
+ return count;
+}
+#endif
OpenPOWER on IntegriCloud