Enlist the FreeBSD-CURRENT users as testers of what is to become Gcc 3.1.0.

These bits are taken from the FSF anoncvs repo on 1-Feb-2002 08:20 PST.
author: obrien <obrien@FreeBSD.org> 2002-02-01 18:16:02 +0000
committer: obrien <obrien@FreeBSD.org> 2002-02-01 18:16:02 +0000
commit: c9ab9ae440a8066b2c2b85b157b1fdadcf09916a (patch)
tree: 086d9d6c8fbd4fc8fe4495059332f66bc0f8d12b /contrib/gcc/c-lex.c
parent: 2ecfd8bd04b63f335c1ec6295740a4bfd97a4fa6 (diff)
download: FreeBSD-src-c9ab9ae440a8066b2c2b85b157b1fdadcf09916a.zip
FreeBSD-src-c9ab9ae440a8066b2c2b85b157b1fdadcf09916a.tar.gz
1 files changed, 1192 insertions, 2108 deletions
diff --git a/contrib/gcc/c-lex.c b/contrib/gcc/c-lex.c
index 27c65f3..a0d2bbd 100644
--- a/contrib/gcc/c-lex.c
+++ b/contrib/gcc/c-lex.c
@@ -1,37 +1,43 @@
 /* Lexical analyzer for C and Objective C.
-   Copyright (C) 1987, 88, 89, 92, 94-97, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
+   1998, 1999, 2000 Free Software Foundation, Inc.
 
-This file is part of GNU CC.
+This file is part of GCC.
 
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
 
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
 
 You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.  */
 
 #include "config.h"
 #include "system.h"
 
 #include "rtl.h"
 #include "tree.h"
+#include "expr.h"
 #include "input.h"
 #include "output.h"
 #include "c-lex.h"
 #include "c-tree.h"
 #include "flags.h"
-#include "c-parse.h"
+#include "timevar.h"
+#include "cpplib.h"
 #include "c-pragma.h"
 #include "toplev.h"
 #include "intl.h"
+#include "tm_p.h"
+#include "splay-tree.h"
+#include "debug.h"
 
 /* MULTIBYTE_CHARS support only works for native compilers.
    ??? Ideally what we want is to model widechar support after
@@ -44,1039 +50,642 @@ Boston, MA 02111-1307, USA.  */
 #include "mbchar.h"
 #include <locale.h>
 #endif /* MULTIBYTE_CHARS */
-
-#if USE_CPPLIB
-#include "cpplib.h"
-extern cpp_reader  parse_in;
-extern cpp_options parse_options;
-#else
-/* Stream for reading from the input file.  */
-FILE *finput;
+#ifndef GET_ENVIRONMENT
+#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
 #endif
 
-extern void yyprint			PROTO((FILE *, int, YYSTYPE));
+/* The current line map.  */
+static const struct line_map *map;
+
+/* The line used to refresh the lineno global variable after each token.  */
+static unsigned int src_lineno;
 
-/* The elements of `ridpointers' are identifier nodes
-   for the reserved type names and storage classes.
-   It is indexed by a RID_... value.  */
-tree ridpointers[(int) RID_MAX];
+/* We may keep statistics about how long which files took to compile.  */
+static int header_time, body_time;
+static splay_tree file_info_tree;
 
 /* Cause the `yydebug' variable to be defined.  */
 #define YYDEBUG 1
 
-#if USE_CPPLIB
-extern unsigned char *yy_cur, *yy_lim;
-
-extern int yy_get_token ();
-
-#define GETC() (yy_cur < yy_lim ? *yy_cur++ : yy_get_token ())
-#define UNGETC(c) ((c) == EOF ? 0 : yy_cur--)
-#else
-#define GETC() getc (finput)
-#define UNGETC(c) ungetc (c, finput)
-#endif
-
-/* the declaration found for the last IDENTIFIER token read in.
-   yylex must look this up to detect typedefs, which get token type TYPENAME,
-   so it is left around in case the identifier is not a typedef but is
-   used in a context which makes it a reference to a variable.  */
-tree lastiddecl;
-
-/* Nonzero enables objc features.  */
-
-int doing_objc_thang;
-
-extern int yydebug;
-
 /* File used for outputting assembler code.  */
 extern FILE *asm_out_file;
 
-#ifndef WCHAR_TYPE_SIZE
-#ifdef INT_TYPE_SIZE
-#define WCHAR_TYPE_SIZE INT_TYPE_SIZE
-#else
-#define WCHAR_TYPE_SIZE	BITS_PER_WORD
-#endif
-#endif
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
 
 /* Number of bytes in a wide character.  */
 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
 
-static int maxtoken;		/* Current nominal length of token buffer.  */
-char *token_buffer;	/* Pointer to token buffer.
-			   Actual allocated length is maxtoken + 2.
-			   This is not static because objc-parse.y uses it.  */
-
-static int indent_level = 0;        /* Number of { minus number of }. */
-
-/* Nonzero if end-of-file has been seen on input.  */
-static int end_of_file;
-
-#if !USE_CPPLIB
-/* Buffered-back input character; faster than using ungetc.  */
-static int nextchar = -1;
-#endif
-
-#ifdef HANDLE_GENERIC_PRAGMAS
-static int handle_generic_pragma	PROTO((int));
-#endif /* HANDLE_GENERIC_PRAGMAS */
-static int whitespace_cr		PROTO((int));
-static int skip_white_space		PROTO((int));
-static int skip_white_space_on_line	PROTO((void));
-static char *extend_token_buffer	PROTO((const char *));
-static int readescape			PROTO((int *));
-static void parse_float			PROTO((PTR));
-
-/* Do not insert generated code into the source, instead, include it.
-   This allows us to build gcc automatically even for targets that
-   need to add or modify the reserved keyword lists.  */
-#include "c-gperf.h"
-
-/* Return something to represent absolute declarators containing a *.
-   TARGET is the absolute declarator that the * contains.
-   TYPE_QUALS is a list of modifiers such as const or volatile
-   to apply to the pointer type, represented as identifiers.
-
-   We return an INDIRECT_REF whose "contents" are TARGET
-   and whose type is the modifier list.  */
-
-tree
-make_pointer_declarator (type_quals, target)
-     tree type_quals, target;
-{
-  return build1 (INDIRECT_REF, type_quals, target);
-}
+int indent_level;        /* Number of { minus number of }.  */
+int pending_lang_change; /* If we need to switch languages - C++ only */
+int c_header_level;	 /* depth in C headers - C++ only */
+
+/* Nonzero tells yylex to ignore \ in string constants.  */
+static int ignore_escape_flag;
+
+static void parse_float		PARAMS ((PTR));
+static tree lex_number		PARAMS ((const char *, unsigned int));
+static tree lex_string		PARAMS ((const char *, unsigned int, int));
+static tree lex_charconst	PARAMS ((const cpp_token *));
+static void update_header_times	PARAMS ((const char *));
+static int dump_one_header	PARAMS ((splay_tree_node, void *));
+static void cb_line_change     PARAMS ((cpp_reader *, const cpp_token *, int));
+static void cb_ident		PARAMS ((cpp_reader *, unsigned int,
+					 const cpp_string *));
+static void cb_file_change    PARAMS ((cpp_reader *, const struct line_map *));
+static void cb_def_pragma	PARAMS ((cpp_reader *, unsigned int));
+static void cb_define		PARAMS ((cpp_reader *, unsigned int,
+					 cpp_hashnode *));
+static void cb_undef		PARAMS ((cpp_reader *, unsigned int,
+					 cpp_hashnode *));
 
-void
-forget_protocol_qualifiers ()
+const char *
+init_c_lex (filename)
+     const char *filename;
 {
-  int i, n = sizeof wordlist / sizeof (struct resword);
-
-  for (i = 0; i < n; i++)
-    if ((int) wordlist[i].rid >= (int) RID_IN
-        && (int) wordlist[i].rid <= (int) RID_ONEWAY)
-      wordlist[i].name = "";
-}
-
-void
-remember_protocol_qualifiers ()
-{
-  int i, n = sizeof wordlist / sizeof (struct resword);
-
-  for (i = 0; i < n; i++)
-    if (wordlist[i].rid == RID_IN)
-      wordlist[i].name = "in";
-    else if (wordlist[i].rid == RID_OUT)
-      wordlist[i].name = "out";
-    else if (wordlist[i].rid == RID_INOUT)
-      wordlist[i].name = "inout";
-    else if (wordlist[i].rid == RID_BYCOPY)
-      wordlist[i].name = "bycopy";
-    else if (wordlist[i].rid == RID_BYREF)
-      wordlist[i].name = "byref";
-    else if (wordlist[i].rid == RID_ONEWAY)
-      wordlist[i].name = "oneway";
-}
-
-char *
-init_parse (filename)
-     char *filename;
-{
-#if !USE_CPPLIB
-  /* Open input file.  */
-  if (filename == 0 || !strcmp (filename, "-"))
+  struct cpp_callbacks *cb;
+  struct c_fileinfo *toplevel;
+
+  /* Set up filename timing.  Must happen before cpp_read_main_file.  */
+  file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
+				   0,
+				   (splay_tree_delete_value_fn)free);
+  toplevel = get_fileinfo ("<top level>");
+  if (flag_detailed_statistics)
     {
-      finput = stdin;
-      filename = "stdin";
+      header_time = 0;
+      body_time = get_run_time ();
+      toplevel->time = body_time;
     }
-  else
-    finput = fopen (filename, "r");
-  if (finput == 0)
-    pfatal_with_name (filename);
-
-#ifdef IO_BUFFER_SIZE
-  setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
-#endif
-#else /* !USE_CPPLIB */
-  parse_in.show_column = 1;
-  if (! cpp_start_read (&parse_in, filename))
-    abort ();
-
-  if (filename == 0 || !strcmp (filename, "-"))
-    filename = "stdin";
-
-  /* cpp_start_read always puts at least one line directive into the
-     token buffer.  We must arrange to read it out here. */
-  yy_cur = parse_in.token_buffer;
-  yy_lim = CPP_PWRITTEN (&parse_in);
-#endif
-
-  init_lex ();
-
-  return filename;
-}
-
-void
-finish_parse ()
-{
-#if USE_CPPLIB
-  cpp_finish (&parse_in);
-#else
-  fclose (finput);
-#endif
-}
-
-void
-init_lex ()
-{
-  /* Make identifier nodes long enough for the language-specific slots.  */
-  set_identifier_size (sizeof (struct lang_identifier));
-
-  /* Start it at 0, because check_newline is called at the very beginning
-     and will increment it to 1.  */
-  lineno = 0;
-
+  
 #ifdef MULTIBYTE_CHARS
   /* Change to the native locale for multibyte conversions.  */
   setlocale (LC_CTYPE, "");
-  literal_codeset = getenv ("LANG");
+  GET_ENVIRONMENT (literal_codeset, "LANG");
 #endif
 
-  maxtoken = 40;
-  token_buffer = (char *) xmalloc (maxtoken + 2);
-
-  ridpointers[(int) RID_INT] = get_identifier ("int");
-  ridpointers[(int) RID_CHAR] = get_identifier ("char");
-  ridpointers[(int) RID_VOID] = get_identifier ("void");
-  ridpointers[(int) RID_FLOAT] = get_identifier ("float");
-  ridpointers[(int) RID_DOUBLE] = get_identifier ("double");
-  ridpointers[(int) RID_SHORT] = get_identifier ("short");
-  ridpointers[(int) RID_LONG] = get_identifier ("long");
-  ridpointers[(int) RID_UNSIGNED] = get_identifier ("unsigned");
-  ridpointers[(int) RID_SIGNED] = get_identifier ("signed");
-  ridpointers[(int) RID_INLINE] = get_identifier ("inline");
-  ridpointers[(int) RID_CONST] = get_identifier ("const");
-  ridpointers[(int) RID_RESTRICT] = get_identifier ("restrict");
-  ridpointers[(int) RID_VOLATILE] = get_identifier ("volatile");
-  ridpointers[(int) RID_AUTO] = get_identifier ("auto");
-  ridpointers[(int) RID_STATIC] = get_identifier ("static");
-  ridpointers[(int) RID_EXTERN] = get_identifier ("extern");
-  ridpointers[(int) RID_TYPEDEF] = get_identifier ("typedef");
-  ridpointers[(int) RID_REGISTER] = get_identifier ("register");
-  ridpointers[(int) RID_ITERATOR] = get_identifier ("iterator");
-  ridpointers[(int) RID_COMPLEX] = get_identifier ("complex");
-  ridpointers[(int) RID_ID] = get_identifier ("id");
-  ridpointers[(int) RID_IN] = get_identifier ("in");
-  ridpointers[(int) RID_OUT] = get_identifier ("out");
-  ridpointers[(int) RID_INOUT] = get_identifier ("inout");
-  ridpointers[(int) RID_BYCOPY] = get_identifier ("bycopy");
-  ridpointers[(int) RID_BYREF] = get_identifier ("byref");
-  ridpointers[(int) RID_ONEWAY] = get_identifier ("oneway");
-  forget_protocol_qualifiers();
-
-  /* Some options inhibit certain reserved words.
-     Clear those words out of the hash table so they won't be recognized.  */
-#define UNSET_RESERVED_WORD(STRING) \
-  do { struct resword *s = is_reserved_word (STRING, sizeof (STRING) - 1); \
-       if (s) s->name = ""; } while (0)
-
-  if (! doing_objc_thang)
-    UNSET_RESERVED_WORD ("id");
-
-  if (flag_traditional)
-    {
-      UNSET_RESERVED_WORD ("const");
-      UNSET_RESERVED_WORD ("restrict");
-      UNSET_RESERVED_WORD ("volatile");
-      UNSET_RESERVED_WORD ("typeof");
-      UNSET_RESERVED_WORD ("signed");
-      UNSET_RESERVED_WORD ("inline");
-      UNSET_RESERVED_WORD ("iterator");
-      UNSET_RESERVED_WORD ("complex");
-    }
-  else if (!flag_isoc9x)
-    UNSET_RESERVED_WORD ("restrict");
+  cb = cpp_get_callbacks (parse_in);
+
+  cb->line_change = cb_line_change;
+  cb->ident = cb_ident;
+  cb->file_change = cb_file_change;
+  cb->def_pragma = cb_def_pragma;
 
-  if (flag_no_asm)
+  /* Set the debug callbacks if we can use them.  */
+  if (debug_info_level == DINFO_LEVEL_VERBOSE
+      && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
+          || write_symbols == VMS_AND_DWARF2_DEBUG))
     {
-      UNSET_RESERVED_WORD ("asm");
-      UNSET_RESERVED_WORD ("typeof");
-      UNSET_RESERVED_WORD ("inline");
-      UNSET_RESERVED_WORD ("iterator");
-      UNSET_RESERVED_WORD ("complex");
+      cb->define = cb_define;
+      cb->undef = cb_undef;
     }
-}
-
-void
-reinit_parse_for_function ()
-{
-}
-
-/* Function used when yydebug is set, to print a token in more detail.  */
 
-void
-yyprint (file, yychar, yylval)
-     FILE *file;
-     int yychar;
-     YYSTYPE yylval;
-{
-  tree t;
-  switch (yychar)
-    {
-    case IDENTIFIER:
-    case TYPENAME:
-    case OBJECTNAME:
-      t = yylval.ttype;
-      if (IDENTIFIER_POINTER (t))
-	fprintf (file, " `%s'", IDENTIFIER_POINTER (t));
-      break;
+  /* Start it at 0.  */
+  lineno = 0;
 
-    case CONSTANT:
-      t = yylval.ttype;
-      if (TREE_CODE (t) == INTEGER_CST)
-	fprintf (file,
-#if HOST_BITS_PER_WIDE_INT == 64
-#if HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_INT
-		 " 0x%x%016x",
-#else
-#if HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_LONG
-		 " 0x%lx%016lx",
-#else
-		 " 0x%llx%016llx",
-#endif
-#endif
-#else
-#if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT
-		 " 0x%lx%08lx",
-#else
-		 " 0x%x%08x",
-#endif
-#endif
-		 TREE_INT_CST_HIGH (t), TREE_INT_CST_LOW (t));
-      break;
-    }
-}
-
-/* Iff C is a carriage return, warn about it - if appropriate -
-   and return nonzero.  */
-static int
-whitespace_cr (c)
-     int c;
-{
-  static int newline_warning = 0;
+  if (filename == NULL || !strcmp (filename, "-"))
+    filename = "";
 
-  if (c == '\r')
-    {
-      /* ANSI C says the effects of a carriage return in a source file
-	 are undefined.  */
-      if (pedantic && !newline_warning)
-	{
-	  warning ("carriage return in source file");
-	  warning ("(we only warn about the first carriage return)");
-	  newline_warning = 1;
-	}
-      return 1;
-    }
-  return 0;
+  return cpp_read_main_file (parse_in, filename, ident_hash);
 }
 
-/* If C is not whitespace, return C.
-   Otherwise skip whitespace and return first nonwhite char read.  */
+/* A thin wrapper around the real parser that initializes the 
+   integrated preprocessor after debug output has been initialized.
+   Also, make sure the start_source_file debug hook gets called for
+   the primary source file.  */
 
-static int
-skip_white_space (c)
-     register int c;
+int
+yyparse()
 {
-  for (;;)
-    {
-      switch (c)
-	{
-	  /* We don't recognize comments here, because
-	     cpp output can include / and * consecutively as operators.
-	     Also, there's no need, since cpp removes all comments.  */
-
-	case '\n':
-	  c = check_newline ();
-	  break;
-
-	case ' ':
-	case '\t':
-	case '\f':
-	case '\v':
-	case '\b':
-	  c = GETC();
-	  break;
-
-	case '\r':
-	  whitespace_cr (c);
-	  c = GETC();
-	  break;
-
-	case '\\':
-	  c = GETC();
-	  if (c == '\n')
-	    lineno++;
-	  else
-	    error ("stray '\\' in program");
-	  c = GETC();
-	  break;
+  (*debug_hooks->start_source_file) (lineno, input_filename);
+  cpp_finish_options (parse_in);
 
-	default:
-	  return (c);
-	}
-    }
+  return yyparse_1();
 }
 
-/* Skips all of the white space at the current location in the input file.
-   Must use and reset nextchar if it has the next character.  */
-
-void
-position_after_white_space ()
+struct c_fileinfo *
+get_fileinfo (name)
+     const char *name;
 {
-  register int c;
-
-#if !USE_CPPLIB
-  if (nextchar != -1)
-    c = nextchar, nextchar = -1;
-  else
-#endif
-    c = GETC();
-
-  UNGETC (skip_white_space (c));
+  splay_tree_node n;
+  struct c_fileinfo *fi;
+
+  n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
+  if (n)
+    return (struct c_fileinfo *) n->value;
+
+  fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
+  fi->time = 0;
+  fi->interface_only = 0;
+  fi->interface_unknown = 1;
+  splay_tree_insert (file_info_tree, (splay_tree_key) name,
+		     (splay_tree_value) fi);
+  return fi;
 }
 
-/* Like skip_white_space, but don't advance beyond the end of line.
-   Moreover, we don't get passed a character to start with.  */
-static int
-skip_white_space_on_line ()
+static void
+update_header_times (name)
+     const char *name;
 {
-  register int c;
-
-  while (1)
+  /* Changing files again.  This means currently collected time
+     is charged against header time, and body time starts back at 0.  */
+  if (flag_detailed_statistics)
     {
-      c = GETC();
-      switch (c)
-	{
-	case '\n':
-	default:
-	  break;
-
-	case ' ':
-	case '\t':
-	case '\f':
-	case '\v':
-	case '\b':
-	  continue;
-
-	case '\r':
-	  whitespace_cr (c);
-	  continue;
-	}
-      break;
+      int this_time = get_run_time ();
+      struct c_fileinfo *file = get_fileinfo (name);
+      header_time += this_time - body_time;
+      file->time += this_time - body_time;
+      body_time = this_time;
     }
-  return c;
 }
 
-/* Make the token buffer longer, preserving the data in it.
-   P should point to just beyond the last valid character in the old buffer.
-   The value we return is a pointer to the new buffer
-   at a place corresponding to P.  */
-
-static char *
-extend_token_buffer (p)
-     const char *p;
-{
-  int offset = p - token_buffer;
-
-  maxtoken = maxtoken * 2 + 10;
-  token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
-
-  return token_buffer + offset;
-}
-
-#if defined HANDLE_PRAGMA
-/* Local versions of these macros, that can be passed as function pointers.  */
 static int
-pragma_getc ()
+dump_one_header (n, dummy)
+     splay_tree_node n;
+     void *dummy ATTRIBUTE_UNUSED;
 {
-  return GETC();
+  print_time ((const char *) n->key,
+	      ((struct c_fileinfo *) n->value)->time);
+  return 0;
 }
 
-static void
-pragma_ungetc (arg)
-     int arg;
+void
+dump_time_statistics ()
 {
-  UNGETC (arg);
+  struct c_fileinfo *file = get_fileinfo (input_filename);
+  int this_time = get_run_time ();
+  file->time += this_time - body_time;
+
+  fprintf (stderr, "\n******\n");
+  print_time ("header files (total)", header_time);
+  print_time ("main file (total)", this_time - body_time);
+  fprintf (stderr, "ratio = %g : 1\n",
+	   (double)header_time / (double)(this_time - body_time));
+  fprintf (stderr, "\n******\n");
+
+  splay_tree_foreach (file_info_tree, dump_one_header, 0);
 }
-#endif
 
-/* At the beginning of a line, increment the line number
-   and process any #-directive on this line.
-   If the line is a #-directive, read the entire line and return a newline.
-   Otherwise, return the line's first non-whitespace character.  */
+/* Not yet handled: #pragma, #define, #undef.
+   No need to deal with linemarkers under normal conditions.  */
 
-int
-check_newline ()
+static void
+cb_ident (pfile, line, str)
+     cpp_reader *pfile ATTRIBUTE_UNUSED;
+     unsigned int line ATTRIBUTE_UNUSED;
+     const cpp_string *str ATTRIBUTE_UNUSED;
 {
-  register int c;
-  register int token;
-
-  lineno++;
-
-  /* Read first nonwhite char on the line.  */
-
-  c = GETC();
-  while (c == ' ' || c == '\t')
-    c = GETC();
-
-  if (c != '#')
+#ifdef ASM_OUTPUT_IDENT
+  if (! flag_no_ident)
     {
-      /* If not #, return it so caller will use it.  */
-      return c;
+      /* Convert escapes in the string.  */
+      tree value = lex_string ((const char *)str->text, str->len, 0);
+      ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
     }
+#endif
+}
 
-  /* Read first nonwhite char after the `#'.  */
-
-  c = GETC();
-  while (c == ' ' || c == '\t')
-    c = GETC();
+/* Called at the start of every non-empty line.  TOKEN is the first
+   lexed token on the line.  Used for diagnostic line numbers.  */
+static void
+cb_line_change (pfile, token, parsing_args)
+     cpp_reader *pfile ATTRIBUTE_UNUSED;
+     const cpp_token *token;
+     int parsing_args ATTRIBUTE_UNUSED;
+{
+  src_lineno = SOURCE_LINE (map, token->line);
+}
 
-  /* If a letter follows, then if the word here is `line', skip
-     it and ignore it; otherwise, ignore the line, with an error
-     if the word isn't `pragma', `ident', `define', or `undef'.  */
+static void
+cb_file_change (pfile, new_map)
+     cpp_reader *pfile ATTRIBUTE_UNUSED;
+     const struct line_map *new_map;
+{
+  unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
 
-  if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+  if (new_map->reason == LC_ENTER)
     {
-      if (c == 'p')
-	{
-	  if (GETC() == 'r'
-	      && GETC() == 'a'
-	      && GETC() == 'g'
-	      && GETC() == 'm'
-	      && GETC() == 'a'
-	      && ((c = GETC()) == ' ' || c == '\t' || c == '\n'
-		   || whitespace_cr (c) ))
-	    {
-	      while (c == ' ' || c == '\t' || whitespace_cr (c))
-		c = GETC ();
-	      if (c == '\n')
-		return c;
-
-#if defined HANDLE_PRAGMA || defined HANDLE_GENERIC_PRAGMAS
-	      UNGETC (c);
-	      token = yylex ();
-	      if (token != IDENTIFIER)
-		goto skipline;
-#endif /* HANDLE_PRAGMA || HANDLE_GENERIC_PRAGMAS */
-
-#ifdef HANDLE_PRAGMA
-	      /* We invoke HANDLE_PRAGMA before HANDLE_GENERIC_PRAGMAS (if
-		 both are defined), in order to give the back end a chance to
-		 override the interpretation of generic style pragmas.  */
-#if !USE_CPPLIB
-	      if (nextchar >= 0)
-		{
-		  c = nextchar, nextchar = -1;
-		  UNGETC (c);
-		}
-#endif /* !USE_CPPLIB */
-
-	      if (TREE_CODE (yylval.ttype) != IDENTIFIER_NODE)
-		goto skipline;
-
-	      if (HANDLE_PRAGMA (pragma_getc, pragma_ungetc,
-				 IDENTIFIER_POINTER (yylval.ttype)))
-		return GETC ();
-#endif /* HANDLE_PRAGMA */
-
-#ifdef HANDLE_GENERIC_PRAGMAS
-	      if (handle_generic_pragma (token))
-		return GETC ();
-#endif /* HANDLE_GENERIC_PRAGMAS */
-
-	      /* Issue a warning message if we have been asked to do so.
-		 Ignoring unknown pragmas in system header file unless
-		 an explcit -Wunknown-pragmas has been given. */
-	      if (warn_unknown_pragmas > 1
-		  || (warn_unknown_pragmas && ! in_system_header))
-		warning ("ignoring pragma: %s", token_buffer);
-
-	      goto skipline;
-	    }
-	}
-
-      else if (c == 'd')
-	{
-	  if (GETC() == 'e'
-	      && GETC() == 'f'
-	      && GETC() == 'i'
-	      && GETC() == 'n'
-	      && GETC() == 'e'
-	      && ((c = GETC()) == ' ' || c == '\t' || c == '\n'))
-	    {
-	      if (c != '\n')
-		debug_define (lineno, GET_DIRECTIVE_LINE ());
-	      goto skipline;
-	    }
-	}
-      else if (c == 'u')
+      /* Don't stack the main buffer on the input stack;
+	 we already did in compile_file.  */
+      if (map == NULL)
+	main_input_filename = new_map->to_file;
+      else
 	{
-	  if (GETC() == 'n'
-	      && GETC() == 'd'
-	      && GETC() == 'e'
-	      && GETC() == 'f'
-	      && ((c = GETC()) == ' ' || c == '\t' || c == '\n'))
+	  lineno = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
+	  push_srcloc (new_map->to_file, 1);
+	  input_file_stack->indent_level = indent_level;
+	  (*debug_hooks->start_source_file) (lineno, new_map->to_file);
+#ifndef NO_IMPLICIT_EXTERN_C
+	  if (c_header_level)
+	    ++c_header_level;
+	  else if (new_map->sysp == 2)
 	    {
-	      if (c != '\n')
-		debug_undef (lineno, GET_DIRECTIVE_LINE ());
-	      goto skipline;
+	      c_header_level = 1;
+	      ++pending_lang_change;
 	    }
-	}
-      else if (c == 'l')
-	{
-	  if (GETC() == 'i'
-	      && GETC() == 'n'
-	      && GETC() == 'e'
-	      && ((c = GETC()) == ' ' || c == '\t'))
-	    goto linenum;
-	}
-      else if (c == 'i')
-	{
-	  if (GETC() == 'd'
-	      && GETC() == 'e'
-	      && GETC() == 'n'
-	      && GETC() == 't'
-	      && ((c = GETC()) == ' ' || c == '\t'))
-	    {
-	      /* #ident.  The pedantic warning is now in cccp.c.  */
-
-	      /* Here we have just seen `#ident '.
-		 A string constant should follow.  */
-
-	      c = skip_white_space_on_line ();
-
-	      /* If no argument, ignore the line.  */
-	      if (c == '\n')
-		return c;
-
-	      UNGETC (c);
-	      token = yylex ();
-	      if (token != STRING
-		  || TREE_CODE (yylval.ttype) != STRING_CST)
-		{
-		  error ("invalid #ident");
-		  goto skipline;
-		}
-
-	      if (!flag_no_ident)
-		{
-#ifdef ASM_OUTPUT_IDENT
-		  ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (yylval.ttype));
 #endif
-		}
-
-	      /* Skip the rest of this line.  */
-	      goto skipline;
-	    }
 	}
-
-      error ("undefined or invalid # directive");
-      goto skipline;
     }
-
-linenum:
-  /* Here we have either `#line' or `# <nonletter>'.
-     In either case, it should be a line number; a digit should follow.  */
-
-  /* Can't use skip_white_space here, but must handle all whitespace
-     that is not '\n', lest we get a recursion for '\r' '\n' when
-     calling yylex.  */
-  UNGETC (c);
-  c = skip_white_space_on_line ();
-
-  /* If the # is the only nonwhite char on the line,
-     just ignore it.  Check the new newline.  */
-  if (c == '\n')
-    return c;
-
-  /* Something follows the #; read a token.  */
-
-  UNGETC (c);
-  token = yylex ();
-
-  if (token == CONSTANT
-      && TREE_CODE (yylval.ttype) == INTEGER_CST)
+  else if (new_map->reason == LC_LEAVE)
     {
-      int old_lineno = lineno;
-      int used_up = 0;
-      /* subtract one, because it is the following line that
-	 gets the specified number */
-
-      int l = TREE_INT_CST_LOW (yylval.ttype) - 1;
-
-      /* Is this the last nonwhite stuff on the line?  */
-      c = skip_white_space_on_line ();
-      if (c == '\n')
-	{
-	  /* No more: store the line number and check following line.  */
-	  lineno = l;
-	  return c;
-	}
-      UNGETC (c);
-
-      /* More follows: it must be a string constant (filename).  */
-
-      /* Read the string constant.  */
-      token = yylex ();
-
-      if (token != STRING || TREE_CODE (yylval.ttype) != STRING_CST)
-	{
-	  error ("invalid #line");
-	  goto skipline;
-	}
-
-      input_filename
-	= (char *) permalloc (TREE_STRING_LENGTH (yylval.ttype) + 1);
-      strcpy (input_filename, TREE_STRING_POINTER (yylval.ttype));
-      lineno = l;
-
-      /* Each change of file name
-	 reinitializes whether we are now in a system header.  */
-      in_system_header = 0;
-
-      if (main_input_filename == 0)
-	main_input_filename = input_filename;
-
-      /* Is this the last nonwhite stuff on the line?  */
-      c = skip_white_space_on_line ();
-      if (c == '\n')
+#ifndef NO_IMPLICIT_EXTERN_C
+      if (c_header_level && --c_header_level == 0)
 	{
-	  /* Update the name in the top element of input_file_stack.  */
-	  if (input_file_stack)
-	    input_file_stack->name = input_filename;
-
-	  return c;
+	  if (new_map->sysp == 2)
+	    warning ("badly nested C headers from preprocessor");
+	  --pending_lang_change;
 	}
-      UNGETC (c);
-
-      token = yylex ();
-      used_up = 0;
-
-      /* `1' after file name means entering new file.
-	 `2' after file name means just left a file.  */
-
-      if (token == CONSTANT
-	  && TREE_CODE (yylval.ttype) == INTEGER_CST)
-	{
-	  if (TREE_INT_CST_LOW (yylval.ttype) == 1)
-	    {
-	      /* Pushing to a new file.  */
-	      struct file_stack *p
-		= (struct file_stack *) xmalloc (sizeof (struct file_stack));
-	      input_file_stack->line = old_lineno;
-	      p->next = input_file_stack;
-	      p->name = input_filename;
-	      p->indent_level = indent_level;
-	      input_file_stack = p;
-	      input_file_stack_tick++;
-	      debug_start_source_file (input_filename);
-	      used_up = 1;
-	    }
-	  else if (TREE_INT_CST_LOW (yylval.ttype) == 2)
-	    {
-	      /* Popping out of a file.  */
-	      if (input_file_stack->next)
-		{
-		  struct file_stack *p = input_file_stack;
-		  if (indent_level != p->indent_level)
-		    {
-		      warning_with_file_and_line
-			(p->name, old_lineno,
-			 "This file contains more `%c's than `%c's.",
-			 indent_level > p->indent_level ? '{' : '}',
-			 indent_level > p->indent_level ? '}' : '{');
-		    }
-		  input_file_stack = p->next;
-		  free (p);
-		  input_file_stack_tick++;
-		  debug_end_source_file (input_file_stack->line);
-		}
-	      else
-		error ("#-lines for entering and leaving files don't match");
-
-	      used_up = 1;
-	    }
-	}
-
-      /* Now that we've pushed or popped the input stack,
-	 update the name in the top element.  */
-      if (input_file_stack)
-	input_file_stack->name = input_filename;
-
-      /* If we have handled a `1' or a `2',
-	 see if there is another number to read.  */
-      if (used_up)
-	{
-	  /* Is this the last nonwhite stuff on the line?  */
-	  c = skip_white_space_on_line ();
-	  if (c == '\n')
-	    return c;
-	  UNGETC (c);
-
-	  token = yylex ();
-	  used_up = 0;
-	}
-
-      /* `3' after file name means this is a system header file.  */
-
-      if (token == CONSTANT
-	  && TREE_CODE (yylval.ttype) == INTEGER_CST
-	  && TREE_INT_CST_LOW (yylval.ttype) == 3)
-	in_system_header = 1, used_up = 1;
-
-      if (used_up)
+#endif
+#if 0
+      if (indent_level != input_file_stack->indent_level)
 	{
-	  /* Is this the last nonwhite stuff on the line?  */
-	  c = skip_white_space_on_line ();
-	  if (c == '\n')
-	    return c;
-	  UNGETC (c);
+	  warning_with_file_and_line
+	    (input_filename, lineno,
+	     "this file contains more '%c's than '%c's",
+	     indent_level > input_file_stack->indent_level ? '{' : '}',
+	     indent_level > input_file_stack->indent_level ? '}' : '{');
 	}
-
-      warning ("unrecognized text at end of #line");
-    }
-  else
-    error ("invalid #-line");
-
-  /* skip the rest of this line.  */
- skipline:
-#if !USE_CPPLIB
-  if (c != '\n' && c != EOF && nextchar >= 0)
-    c = nextchar, nextchar = -1;
 #endif
-  while (c != '\n' && c != EOF)
-    c = GETC();
-  return c;
-}
-
-#ifdef HANDLE_GENERIC_PRAGMAS
+      pop_srcloc ();
+      
+      (*debug_hooks->end_source_file) (to_line);
+    }
 
-/* Handle a #pragma directive.
-   TOKEN is the token we read after `#pragma'.  Processes the entire input
-   line and return non-zero iff the pragma has been successfully parsed.  */
+  update_header_times (new_map->to_file);
+  in_system_header = new_map->sysp != 0;
+  input_filename = new_map->to_file;
+  lineno = to_line;
+  map = new_map;
 
-/* This function has to be in this file, in order to get at
-   the token types.  */
+  /* Hook for C++.  */
+  extract_interface_info ();
+}
 
-static int
-handle_generic_pragma (token)
-     register int token;
+static void
+cb_def_pragma (pfile, line)
+     cpp_reader *pfile;
+     unsigned int line;
 {
-  register int c;
-
-  for (;;)
+  /* Issue a warning message if we have been asked to do so.  Ignore
+     unknown pragmas in system headers unless an explicit
+     -Wunknown-pragmas has been given.  */
+  if (warn_unknown_pragmas > in_system_header)
     {
-      switch (token)
-	{
-	case IDENTIFIER:
-	case TYPENAME:
-	case STRING:
-	case CONSTANT:
-	  handle_pragma_token (token_buffer, yylval.ttype);
-	  break;
-	default:
-	  handle_pragma_token (token_buffer, NULL);
-	}
-#if !USE_CPPLIB
-      if (nextchar >= 0)
-	c = nextchar, nextchar = -1;
+      const unsigned char *space, *name = 0;
+      const cpp_token *s;
+
+      s = cpp_get_token (pfile);
+      space = cpp_token_as_text (pfile, s);
+      s = cpp_get_token (pfile);
+      if (s->type == CPP_NAME)
+	name = cpp_token_as_text (pfile, s);
+
+      lineno = SOURCE_LINE (map, line);
+      if (name)
+	warning ("ignoring #pragma %s %s", space, name);
       else
-#endif
-	c = GETC ();
-
-      while (c == ' ' || c == '\t')
-	c = GETC ();
-      UNGETC (c);
-
-      if (c == '\n' || c == EOF)
-	return handle_pragma_token (NULL, NULL);
-
-      token = yylex ();
+	warning ("ignoring #pragma %s", space);
     }
 }
 
-#endif /* HANDLE_GENERIC_PRAGMAS */
-
-#define ENDFILE -1  /* token that represents end-of-file */
-
-/* Read an escape sequence, returning its equivalent as a character,
-   or store 1 in *ignore_ptr if it is backslash-newline.  */
-
-static int
-readescape (ignore_ptr)
-     int *ignore_ptr;
+/* #define callback for DWARF and DWARF2 debug info.  */
+static void
+cb_define (pfile, line, node)
+     cpp_reader *pfile;
+     unsigned int line;
+     cpp_hashnode *node;
 {
-  register int c = GETC();
-  register int code;
-  register unsigned count;
-  unsigned firstdig = 0;
-  int nonnull;
-
-  switch (c)
-    {
-    case 'x':
-      if (warn_traditional)
-	warning ("the meaning of `\\x' varies with -traditional");
-
-      if (flag_traditional)
-	return c;
-
-      code = 0;
-      count = 0;
-      nonnull = 0;
-      while (1)
-	{
-	  c = GETC();
-	  if (!(c >= 'a' && c <= 'f')
-	      && !(c >= 'A' && c <= 'F')
-	      && !(c >= '0' && c <= '9'))
-	    {
-	      UNGETC (c);
-	      break;
-	    }
-	  code *= 16;
-	  if (c >= 'a' && c <= 'f')
-	    code += c - 'a' + 10;
-	  if (c >= 'A' && c <= 'F')
-	    code += c - 'A' + 10;
-	  if (c >= '0' && c <= '9')
-	    code += c - '0';
-	  if (code != 0 || count != 0)
-	    {
-	      if (count == 0)
-		firstdig = code;
-	      count++;
-	    }
-	  nonnull = 1;
-	}
-      if (! nonnull)
-	error ("\\x used with no following hex digits");
-      else if (count == 0)
-	/* Digits are all 0's.  Ok.  */
-	;
-      else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
-	       || (count > 1
-		   && (((unsigned)1 << (TYPE_PRECISION (integer_type_node) - (count - 1) * 4))
-		       <= firstdig)))
-	pedwarn ("hex escape out of range");
-      return code;
-
-    case '0':  case '1':  case '2':  case '3':  case '4':
-    case '5':  case '6':  case '7':
-      code = 0;
-      count = 0;
-      while ((c <= '7') && (c >= '0') && (count++ < 3))
-	{
-	  code = (code * 8) + (c - '0');
-	  c = GETC();
-	}
-      UNGETC (c);
-      return code;
-
-    case '\\': case '\'': case '"':
-      return c;
-
-    case '\n':
-      lineno++;
-      *ignore_ptr = 1;
-      return 0;
+  (*debug_hooks->define) (SOURCE_LINE (map, line),
+			  (const char *) cpp_macro_definition (pfile, node));
+}
 
-    case 'n':
-      return TARGET_NEWLINE;
+/* #undef callback for DWARF and DWARF2 debug info.  */
+static void
+cb_undef (pfile, line, node)
+     cpp_reader *pfile ATTRIBUTE_UNUSED;
+     unsigned int line;
+     cpp_hashnode *node;
+{
+  (*debug_hooks->undef) (SOURCE_LINE (map, line),
+			 (const char *) NODE_NAME (node));
+}
 
-    case 't':
-      return TARGET_TAB;
+#if 0 /* not yet */
+/* Returns nonzero if C is a universal-character-name.  Give an error if it
+   is not one which may appear in an identifier, as per [extendid].
 
-    case 'r':
-      return TARGET_CR;
+   Note that extended character support in identifiers has not yet been
+   implemented.  It is my personal opinion that this is not a desirable
+   feature.  Portable code cannot count on support for more than the basic
+   identifier character set.  */
 
-    case 'f':
-      return TARGET_FF;
+static inline int
+is_extended_char (c)
+     int c;
+{
+#ifdef TARGET_EBCDIC
+  return 0;
+#else
+  /* ASCII.  */
+  if (c < 0x7f)
+    return 0;
 
-    case 'b':
-      return TARGET_BS;
+  /* None of the valid chars are outside the Basic Multilingual Plane (the
+     low 16 bits).  */
+  if (c > 0xffff)
+    {
+      error ("universal-character-name '\\U%08x' not valid in identifier", c);
+      return 1;
+    }
+  
+  /* Latin */
+  if ((c >= 0x00c0 && c <= 0x00d6)
+      || (c >= 0x00d8 && c <= 0x00f6)
+      || (c >= 0x00f8 && c <= 0x01f5)
+      || (c >= 0x01fa && c <= 0x0217)
+      || (c >= 0x0250 && c <= 0x02a8)
+      || (c >= 0x1e00 && c <= 0x1e9a)
+      || (c >= 0x1ea0 && c <= 0x1ef9))
+    return 1;
+
+  /* Greek */
+  if ((c == 0x0384)
+      || (c >= 0x0388 && c <= 0x038a)
+      || (c == 0x038c)
+      || (c >= 0x038e && c <= 0x03a1)
+      || (c >= 0x03a3 && c <= 0x03ce)
+      || (c >= 0x03d0 && c <= 0x03d6)
+      || (c == 0x03da)
+      || (c == 0x03dc)
+      || (c == 0x03de)
+      || (c == 0x03e0)
+      || (c >= 0x03e2 && c <= 0x03f3)
+      || (c >= 0x1f00 && c <= 0x1f15)
+      || (c >= 0x1f18 && c <= 0x1f1d)
+      || (c >= 0x1f20 && c <= 0x1f45)
+      || (c >= 0x1f48 && c <= 0x1f4d)
+      || (c >= 0x1f50 && c <= 0x1f57)
+      || (c == 0x1f59)
+      || (c == 0x1f5b)
+      || (c == 0x1f5d)
+      || (c >= 0x1f5f && c <= 0x1f7d)
+      || (c >= 0x1f80 && c <= 0x1fb4)
+      || (c >= 0x1fb6 && c <= 0x1fbc)
+      || (c >= 0x1fc2 && c <= 0x1fc4)
+      || (c >= 0x1fc6 && c <= 0x1fcc)
+      || (c >= 0x1fd0 && c <= 0x1fd3)
+      || (c >= 0x1fd6 && c <= 0x1fdb)
+      || (c >= 0x1fe0 && c <= 0x1fec)
+      || (c >= 0x1ff2 && c <= 0x1ff4)
+      || (c >= 0x1ff6 && c <= 0x1ffc))
+    return 1;
+
+  /* Cyrillic */
+  if ((c >= 0x0401 && c <= 0x040d)
+      || (c >= 0x040f && c <= 0x044f)
+      || (c >= 0x0451 && c <= 0x045c)
+      || (c >= 0x045e && c <= 0x0481)
+      || (c >= 0x0490 && c <= 0x04c4)
+      || (c >= 0x04c7 && c <= 0x04c8)
+      || (c >= 0x04cb && c <= 0x04cc)
+      || (c >= 0x04d0 && c <= 0x04eb)
+      || (c >= 0x04ee && c <= 0x04f5)
+      || (c >= 0x04f8 && c <= 0x04f9))
+    return 1;
+
+  /* Armenian */
+  if ((c >= 0x0531 && c <= 0x0556)
+      || (c >= 0x0561 && c <= 0x0587))
+    return 1;
+
+  /* Hebrew */
+  if ((c >= 0x05d0 && c <= 0x05ea)
+      || (c >= 0x05f0 && c <= 0x05f4))
+    return 1;
+
+  /* Arabic */
+  if ((c >= 0x0621 && c <= 0x063a)
+      || (c >= 0x0640 && c <= 0x0652)
+      || (c >= 0x0670 && c <= 0x06b7)
+      || (c >= 0x06ba && c <= 0x06be)
+      || (c >= 0x06c0 && c <= 0x06ce)
+      || (c >= 0x06e5 && c <= 0x06e7))
+    return 1;
+
+  /* Devanagari */
+  if ((c >= 0x0905 && c <= 0x0939)
+      || (c >= 0x0958 && c <= 0x0962))
+    return 1;
+
+  /* Bengali */
+  if ((c >= 0x0985 && c <= 0x098c)
+      || (c >= 0x098f && c <= 0x0990)
+      || (c >= 0x0993 && c <= 0x09a8)
+      || (c >= 0x09aa && c <= 0x09b0)
+      || (c == 0x09b2)
+      || (c >= 0x09b6 && c <= 0x09b9)
+      || (c >= 0x09dc && c <= 0x09dd)
+      || (c >= 0x09df && c <= 0x09e1)
+      || (c >= 0x09f0 && c <= 0x09f1))
+    return 1;
+
+  /* Gurmukhi */
+  if ((c >= 0x0a05 && c <= 0x0a0a)
+      || (c >= 0x0a0f && c <= 0x0a10)
+      || (c >= 0x0a13 && c <= 0x0a28)
+      || (c >= 0x0a2a && c <= 0x0a30)
+      || (c >= 0x0a32 && c <= 0x0a33)
+      || (c >= 0x0a35 && c <= 0x0a36)
+      || (c >= 0x0a38 && c <= 0x0a39)
+      || (c >= 0x0a59 && c <= 0x0a5c)
+      || (c == 0x0a5e))
+    return 1;
+
+  /* Gujarati */
+  if ((c >= 0x0a85 && c <= 0x0a8b)
+      || (c == 0x0a8d)
+      || (c >= 0x0a8f && c <= 0x0a91)
+      || (c >= 0x0a93 && c <= 0x0aa8)
+      || (c >= 0x0aaa && c <= 0x0ab0)
+      || (c >= 0x0ab2 && c <= 0x0ab3)
+      || (c >= 0x0ab5 && c <= 0x0ab9)
+      || (c == 0x0ae0))
+    return 1;
+
+  /* Oriya */
+  if ((c >= 0x0b05 && c <= 0x0b0c)
+      || (c >= 0x0b0f && c <= 0x0b10)
+      || (c >= 0x0b13 && c <= 0x0b28)
+      || (c >= 0x0b2a && c <= 0x0b30)
+      || (c >= 0x0b32 && c <= 0x0b33)
+      || (c >= 0x0b36 && c <= 0x0b39)
+      || (c >= 0x0b5c && c <= 0x0b5d)
+      || (c >= 0x0b5f && c <= 0x0b61))
+    return 1;
+
+  /* Tamil */
+  if ((c >= 0x0b85 && c <= 0x0b8a)
+      || (c >= 0x0b8e && c <= 0x0b90)
+      || (c >= 0x0b92 && c <= 0x0b95)
+      || (c >= 0x0b99 && c <= 0x0b9a)
+      || (c == 0x0b9c)
+      || (c >= 0x0b9e && c <= 0x0b9f)
+      || (c >= 0x0ba3 && c <= 0x0ba4)
+      || (c >= 0x0ba8 && c <= 0x0baa)
+      || (c >= 0x0bae && c <= 0x0bb5)
+      || (c >= 0x0bb7 && c <= 0x0bb9))
+    return 1;
+
+  /* Telugu */
+  if ((c >= 0x0c05 && c <= 0x0c0c)
+      || (c >= 0x0c0e && c <= 0x0c10)
+      || (c >= 0x0c12 && c <= 0x0c28)
+      || (c >= 0x0c2a && c <= 0x0c33)
+      || (c >= 0x0c35 && c <= 0x0c39)
+      || (c >= 0x0c60 && c <= 0x0c61))
+    return 1;
+
+  /* Kannada */
+  if ((c >= 0x0c85 && c <= 0x0c8c)
+      || (c >= 0x0c8e && c <= 0x0c90)
+      || (c >= 0x0c92 && c <= 0x0ca8)
+      || (c >= 0x0caa && c <= 0x0cb3)
+      || (c >= 0x0cb5 && c <= 0x0cb9)
+      || (c >= 0x0ce0 && c <= 0x0ce1))
+    return 1;
+
+  /* Malayalam */
+  if ((c >= 0x0d05 && c <= 0x0d0c)
+      || (c >= 0x0d0e && c <= 0x0d10)
+      || (c >= 0x0d12 && c <= 0x0d28)
+      || (c >= 0x0d2a && c <= 0x0d39)
+      || (c >= 0x0d60 && c <= 0x0d61))
+    return 1;
+
+  /* Thai */
+  if ((c >= 0x0e01 && c <= 0x0e30)
+      || (c >= 0x0e32 && c <= 0x0e33)
+      || (c >= 0x0e40 && c <= 0x0e46)
+      || (c >= 0x0e4f && c <= 0x0e5b))
+    return 1;
+
+  /* Lao */
+  if ((c >= 0x0e81 && c <= 0x0e82)
+      || (c == 0x0e84)
+      || (c == 0x0e87)
+      || (c == 0x0e88)
+      || (c == 0x0e8a)
+      || (c == 0x0e0d)
+      || (c >= 0x0e94 && c <= 0x0e97)
+      || (c >= 0x0e99 && c <= 0x0e9f)
+      || (c >= 0x0ea1 && c <= 0x0ea3)
+      || (c == 0x0ea5)
+      || (c == 0x0ea7)
+      || (c == 0x0eaa)
+      || (c == 0x0eab)
+      || (c >= 0x0ead && c <= 0x0eb0)
+      || (c == 0x0eb2)
+      || (c == 0x0eb3)
+      || (c == 0x0ebd)
+      || (c >= 0x0ec0 && c <= 0x0ec4)
+      || (c == 0x0ec6))
+    return 1;
+
+  /* Georgian */
+  if ((c >= 0x10a0 && c <= 0x10c5)
+      || (c >= 0x10d0 && c <= 0x10f6))
+    return 1;
+
+  /* Hiragana */
+  if ((c >= 0x3041 && c <= 0x3094)
+      || (c >= 0x309b && c <= 0x309e))
+    return 1;
+
+  /* Katakana */
+  if ((c >= 0x30a1 && c <= 0x30fe))
+    return 1;
+
+  /* Bopmofo */
+  if ((c >= 0x3105 && c <= 0x312c))
+    return 1;
+
+  /* Hangul */
+  if ((c >= 0x1100 && c <= 0x1159)
+      || (c >= 0x1161 && c <= 0x11a2)
+      || (c >= 0x11a8 && c <= 0x11f9))
+    return 1;
+
+  /* CJK Unified Ideographs */
+  if ((c >= 0xf900 && c <= 0xfa2d)
+      || (c >= 0xfb1f && c <= 0xfb36)
+      || (c >= 0xfb38 && c <= 0xfb3c)
+      || (c == 0xfb3e)
+      || (c >= 0xfb40 && c <= 0xfb41)
+      || (c >= 0xfb42 && c <= 0xfb44)
+      || (c >= 0xfb46 && c <= 0xfbb1)
+      || (c >= 0xfbd3 && c <= 0xfd3f)
+      || (c >= 0xfd50 && c <= 0xfd8f)
+      || (c >= 0xfd92 && c <= 0xfdc7)
+      || (c >= 0xfdf0 && c <= 0xfdfb)
+      || (c >= 0xfe70 && c <= 0xfe72)
+      || (c == 0xfe74)
+      || (c >= 0xfe76 && c <= 0xfefc)
+      || (c >= 0xff21 && c <= 0xff3a)
+      || (c >= 0xff41 && c <= 0xff5a)
+      || (c >= 0xff66 && c <= 0xffbe)
+      || (c >= 0xffc2 && c <= 0xffc7)
+      || (c >= 0xffca && c <= 0xffcf)
+      || (c >= 0xffd2 && c <= 0xffd7)
+      || (c >= 0xffda && c <= 0xffdc)
+      || (c >= 0x4e00 && c <= 0x9fa5))
+    return 1;
+
+  error ("universal-character-name '\\u%04x' not valid in identifier", c);
+  return 1;
+#endif
+}
 
-    case 'a':
-      if (warn_traditional)
-	warning ("the meaning of `\\a' varies with -traditional");
+/* Add the UTF-8 representation of C to the token_buffer.  */
 
-      if (flag_traditional)
-	return c;
-      return TARGET_BELL;
+static void
+utf8_extend_token (c)
+     int c;
+{
+  int shift, mask;
 
-    case 'v':
-#if 0 /* Vertical tab is present in common usage compilers.  */
-      if (flag_traditional)
-	return c;
-#endif
-      return TARGET_VT;
-
-    case 'e':
-    case 'E':
-      if (pedantic)
-	pedwarn ("non-ANSI-standard escape sequence, `\\%c'", c);
-      return 033;
-
-    case '?':
-      return c;
-
-      /* `\(', etc, are used at beginning of line to avoid confusing Emacs.  */
-    case '(':
-    case '{':
-    case '[':
-      /* `\%' is used to prevent SCCS from getting confused.  */
-    case '%':
-      if (pedantic)
-	pedwarn ("non-ANSI escape sequence `\\%c'", c);
-      return c;
+  if      (c <= 0x0000007f)
+    {
+      extend_token (c);
+      return;
     }
-  if (c >= 040 && c < 0177)
-    pedwarn ("unknown escape sequence `\\%c'", c);
+  else if (c <= 0x000007ff)
+    shift = 6, mask = 0xc0;
+  else if (c <= 0x0000ffff)
+    shift = 12, mask = 0xe0;
+  else if (c <= 0x001fffff)
+    shift = 18, mask = 0xf0;
+  else if (c <= 0x03ffffff)
+    shift = 24, mask = 0xf8;
   else
-    pedwarn ("unknown escape sequence: `\\' followed by char code 0x%x", c);
-  return c;
-}
-
-void
-yyerror (msgid)
-     const char *msgid;
-{
-  const char *string = _(msgid);
-
-  /* We can't print string and character constants well
-     because the token_buffer contains the result of processing escapes.  */
-  if (end_of_file)
-    error ("%s at end of input", string);
-  else if (token_buffer[0] == 0)
-    error ("%s at null character", string);
-  else if (token_buffer[0] == '"')
-    error ("%s before string constant", string);
-  else if (token_buffer[0] == '\'')
-    error ("%s before character constant", string);
-  else if (token_buffer[0] < 040 || (unsigned char) token_buffer[0] >= 0177)
-    error ("%s before character 0%o", string, (unsigned char) token_buffer[0]);
-  else
-    error ("%s before `%s'", string, token_buffer);
+    shift = 30, mask = 0xfc;
+
+  extend_token (mask | (c >> shift));
+  do
+    {
+      shift -= 6;
+      extend_token ((unsigned char) (0x80 | (c >> shift)));
+    }
+  while (shift);
 }
+#endif
 
 #if 0
-
 struct try_type
 {
-  tree *node_var;
-  char unsigned_flag;
-  char long_flag;
-  char long_long_flag;
+  tree *const node_var;
+  const char unsigned_flag;
+  const char long_flag;
+  const char long_long_flag;
 };
 
 struct try_type type_sequence[] =
@@ -1093,15 +702,14 @@ struct try_type type_sequence[] =
 struct pf_args
 {
   /* Input */
+  const char *str;
+  int fflag;
+  int lflag;
   int base;
-  char * p;
-  /* I/O */
-  int c;
-  int imag;
-  tree type;
-  int conversion_errno;
   /* Output */
+  int conversion_errno;
   REAL_VALUE_TYPE value;
+  tree type;
 };
  
 static void
@@ -1109,1242 +717,718 @@ parse_float (data)
   PTR data;
 {
   struct pf_args * args = (struct pf_args *) data;
-  int fflag = 0, lflag = 0;
-  /* Copy token_buffer now, while it has just the number
-     and not the suffixes; once we add `f' or `i',
-     REAL_VALUE_ATOF may not work any more.  */
-  char *copy = (char *) alloca (args->p - token_buffer + 1);
-  bcopy (token_buffer, copy, args->p - token_buffer + 1);
-
-  while (1)
-    {
-      int lose = 0;
-
-      /* Read the suffixes to choose a data type.  */
-      switch (args->c)
-	{
-	case 'f': case 'F':
-	  if (fflag)
-	    error ("more than one `f' in numeric constant");
-	  fflag = 1;
-	  break;
-
-	case 'l': case 'L':
-	  if (lflag)
-	    error ("more than one `l' in numeric constant");
-	  lflag = 1;
-	  break;
-
-	case 'i': case 'I':
-	  if (args->imag)
-	    error ("more than one `i' or `j' in numeric constant");
-	  else if (pedantic)
-	    pedwarn ("ANSI C forbids imaginary numeric constants");
-	  args->imag = 1;
-	  break;
-
-	default:
-	  lose = 1;
-	}
+  const char *typename;
 
-      if (lose)
-	break;
-
-      if (args->p >= token_buffer + maxtoken - 3)
-	args->p = extend_token_buffer (args->p);
-      *(args->p++) = args->c;
-      *(args->p) = 0;
-      args->c = GETC();
-    }
+  args->conversion_errno = 0;
+  args->type = double_type_node;
+  typename = "double";
 
   /* The second argument, machine_mode, of REAL_VALUE_ATOF
      tells the desired precision of the binary result
      of decimal-to-binary conversion.  */
 
-  if (fflag)
+  if (args->fflag)
     {
-      if (lflag)
-	error ("both `f' and `l' in floating constant");
+      if (args->lflag)
+	error ("both 'f' and 'l' suffixes on floating constant");
 
       args->type = float_type_node;
-      errno = 0;
-      if (args->base == 16)
-	args->value = REAL_VALUE_HTOF (copy, TYPE_MODE (args->type));
-      else
-	args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
-      args->conversion_errno = errno;
-      /* A diagnostic is required here by some ANSI C testsuites.
-	 This is not pedwarn, because some people don't want
-	 an error for this.  */
-      if (REAL_VALUE_ISINF (args->value) && pedantic)
-	warning ("floating point number exceeds range of `float'");
+      typename = "float";
     }
-  else if (lflag)
+  else if (args->lflag)
     {
       args->type = long_double_type_node;
-      errno = 0;
-      if (args->base == 16)
-	args->value = REAL_VALUE_HTOF (copy, TYPE_MODE (args->type));
-      else
-	args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
-      args->conversion_errno = errno;
-      if (REAL_VALUE_ISINF (args->value) && pedantic)
-	warning ("floating point number exceeds range of `long double'");
+      typename = "long double";
     }
-  else
+  else if (flag_single_precision_constant)
     {
-      errno = 0;
-      if (args->base == 16)
-	args->value = REAL_VALUE_HTOF (copy, TYPE_MODE (args->type));
-      else
-	args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
-      args->conversion_errno = errno;
-      if (REAL_VALUE_ISINF (args->value) && pedantic)
-	warning ("floating point number exceeds range of `double'");
+      args->type = float_type_node;
+      typename = "float";
     }
+
+  errno = 0;
+  if (args->base == 16)
+    args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
+  else
+    args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
+
+  args->conversion_errno = errno;
+  /* A diagnostic is required here by some ISO C testsuites.
+     This is not pedwarn, because some people don't want
+     an error for this.  */
+  if (REAL_VALUE_ISINF (args->value) && pedantic)
+    warning ("floating point number exceeds range of '%s'", typename);
 }
  
 int
-yylex ()
+c_lex (value)
+     tree *value;
 {
-  register int c;
-  register char *p;
-  register int value;
-  int wide_flag = 0;
-  int objc_flag = 0;
-
-#if !USE_CPPLIB
-  if (nextchar >= 0)
-    c = nextchar, nextchar = -1;
-  else
-#endif
-    c = GETC();
-
-  /* Effectively do c = skip_white_space (c)
-     but do it faster in the usual cases.  */
-  while (1)
-    switch (c)
-      {
-      case ' ':
-      case '\t':
-      case '\f':
-      case '\v':
-      case '\b':
-	c = GETC();
-	break;
-
-      case '\r':
-	/* Call skip_white_space so we can warn if appropriate.  */
-
-      case '\n':
-      case '/':
-      case '\\':
-	c = skip_white_space (c);
-      default:
-	goto found_nonwhite;
-      }
- found_nonwhite:
-
-  token_buffer[0] = c;
-  token_buffer[1] = 0;
-
-/*  yylloc.first_line = lineno; */
-
-  switch (c)
+  const cpp_token *tok;
+
+  retry:
+  timevar_push (TV_CPP);
+  do
+    tok = cpp_get_token (parse_in);
+  while (tok->type == CPP_PADDING);
+  timevar_pop (TV_CPP);
+
+  /* The C++ front end does horrible things with the current line
+     number.  To ensure an accurate line number, we must reset it
+     every time we return a token.  */
+  lineno = src_lineno;
+
+  *value = NULL_TREE;
+  switch (tok->type)
     {
-    case EOF:
-      end_of_file = 1;
-      token_buffer[0] = 0;
-      value = ENDFILE;
+    case CPP_OPEN_BRACE:  indent_level++;  break;
+    case CPP_CLOSE_BRACE: indent_level--;  break;
+
+    /* Issue this error here, where we can get at tok->val.c.  */
+    case CPP_OTHER:
+      if (ISGRAPH (tok->val.c))
+	error ("stray '%c' in program", tok->val.c);
+      else
+	error ("stray '\\%o' in program", tok->val.c);
+      goto retry;
+      
+    case CPP_NAME:
+      *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
       break;
 
-    case 'L':
-      /* Capital L may start a wide-string or wide-character constant.  */
-      {
-	register int c = GETC();
-	if (c == '\'')
-	  {
-	    wide_flag = 1;
-	    goto char_constant;
-	  }
-	if (c == '"')
-	  {
-	    wide_flag = 1;
-	    goto string_constant;
-	  }
-	UNGETC (c);
-      }
-      goto letter;
+    case CPP_NUMBER:
+      *value = lex_number ((const char *)tok->val.str.text, tok->val.str.len);
+      break;
+
+    case CPP_CHAR:
+    case CPP_WCHAR:
+      *value = lex_charconst (tok);
+      break;
+
+    case CPP_STRING:
+    case CPP_WSTRING:
+      *value = lex_string ((const char *)tok->val.str.text,
+			   tok->val.str.len, tok->type == CPP_WSTRING);
+      break;
 
-    case '@':
-      if (!doing_objc_thang)
+      /* These tokens should not be visible outside cpplib.  */
+    case CPP_HEADER_NAME:
+    case CPP_COMMENT:
+    case CPP_MACRO_ARG:
+      abort ();
+
+    default: break;
+    }
+
+  return tok->type;
+}
+
+#define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
+
+static tree
+lex_number (str, len)
+     const char *str;
+     unsigned int len;
+{
+  int base = 10;
+  int count = 0;
+  int largest_digit = 0;
+  int numdigits = 0;
+  int overflow = 0;
+  int c;
+  tree value;
+  const char *p;
+  enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
+  
+  /* We actually store only HOST_BITS_PER_CHAR bits in each part.
+     The code below which fills the parts array assumes that a host
+     int is at least twice as wide as a host char, and that 
+     HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
+     Two HOST_WIDE_INTs is the largest int literal we can store.
+     In order to detect overflow below, the number of parts (TOTAL_PARTS)
+     must be exactly the number of parts needed to hold the bits
+     of two HOST_WIDE_INTs.  */
+#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
+  unsigned int parts[TOTAL_PARTS];
+  
+  /* Optimize for most frequent case.  */
+  if (len == 1)
+    {
+      if (*str == '0')
+	return integer_zero_node;
+      else if (*str == '1')
+	return integer_one_node;
+      else
+	return build_int_2 (*str - '0', 0);
+    }
+
+  for (count = 0; count < TOTAL_PARTS; count++)
+    parts[count] = 0;
+
+  /* len is known to be >1 at this point.  */
+  p = str;
+
+  if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
+    {
+      base = 16;
+      p = str + 2;
+    }
+  /* The ISDIGIT check is so we are not confused by a suffix on 0.  */
+  else if (str[0] == '0' && ISDIGIT (str[1]))
+    {
+      base = 8;
+      p = str + 1;
+    }
+
+  do
+    {
+      c = *p++;
+
+      if (c == '.')
 	{
-	  value = c;
-	  break;
+	  if (floatflag == AFTER_POINT)
+	    ERROR ("too many decimal points in floating constant");
+	  else if (floatflag == AFTER_EXPON)
+	    ERROR ("decimal point in exponent - impossible!");
+	  else
+	    floatflag = AFTER_POINT;
+
+	  if (base == 8)
+	    base = 10;
 	}
+      else if (c == '_')
+	/* Possible future extension: silently ignore _ in numbers,
+	   permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
+	   but somewhat easier to read.  Ada has this?  */
+	ERROR ("underscore in number");
       else
 	{
-	  /* '@' may start a constant string object.  */
-	  register int c = GETC ();
-	  if (c == '"')
+	  int n;
+	  /* It is not a decimal point.
+	     It should be a digit (perhaps a hex digit).  */
+
+	  if (ISDIGIT (c)
+	      || (base == 16 && ISXDIGIT (c)))
 	    {
-	      objc_flag = 1;
-	      goto string_constant;
+	      n = hex_value (c);
 	    }
-	  UNGETC (c);
-	  /* Fall through to treat '@' as the start of an identifier.  */
-	}
-
-    case 'A':  case 'B':  case 'C':  case 'D':  case 'E':
-    case 'F':  case 'G':  case 'H':  case 'I':  case 'J':
-    case 'K':		  case 'M':  case 'N':  case 'O':
-    case 'P':  case 'Q':  case 'R':  case 'S':  case 'T':
-    case 'U':  case 'V':  case 'W':  case 'X':  case 'Y':
-    case 'Z':
-    case 'a':  case 'b':  case 'c':  case 'd':  case 'e':
-    case 'f':  case 'g':  case 'h':  case 'i':  case 'j':
-    case 'k':  case 'l':  case 'm':  case 'n':  case 'o':
-    case 'p':  case 'q':  case 'r':  case 's':  case 't':
-    case 'u':  case 'v':  case 'w':  case 'x':  case 'y':
-    case 'z':
-    case '_':
-    case '$':
-    letter:
-      p = token_buffer;
-      while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
-	{
-	  /* Make sure this char really belongs in an identifier.  */
-	  if (c == '$')
+	  else if (base <= 10 && (c == 'e' || c == 'E'))
 	    {
-	      if (! dollars_in_ident)
-		error ("`$' in identifier");
-	      else if (pedantic)
-		pedwarn ("`$' in identifier");
+	      base = 10;
+	      floatflag = AFTER_EXPON;
+	      break;
+	    }
+	  else if (base == 16 && (c == 'p' || c == 'P'))
+	    {
+	      floatflag = AFTER_EXPON;
+	      break;   /* start of exponent */
+	    }
+	  else
+	    {
+	      p--;
+	      break;  /* start of suffix */
 	    }
 
-	  if (p >= token_buffer + maxtoken)
-	    p = extend_token_buffer (p);
+	  if (n >= largest_digit)
+	    largest_digit = n;
+	  numdigits++;
+
+	  for (count = 0; count < TOTAL_PARTS; count++)
+	    {
+	      parts[count] *= base;
+	      if (count)
+		{
+		  parts[count]
+		    += (parts[count-1] >> HOST_BITS_PER_CHAR);
+		  parts[count-1]
+		    &= (1 << HOST_BITS_PER_CHAR) - 1;
+		}
+	      else
+		parts[0] += n;
+	    }
 
-	  *p++ = c;
-	  c = GETC();
+	  /* If the highest-order part overflows (gets larger than
+	     a host char will hold) then the whole number has 
+	     overflowed.  Record this and truncate the highest-order
+	     part.  */
+	  if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
+	    {
+	      overflow = 1;
+	      parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
+	    }
 	}
+    }
+  while (p < str + len);
 
-      *p = 0;
-#if USE_CPPLIB
-      UNGETC (c);
-#else
-      nextchar = c;
-#endif
+  /* This can happen on input like `int i = 0x;' */
+  if (numdigits == 0)
+    ERROR ("numeric constant with no digits");
+
+  if (largest_digit >= base)
+    ERROR ("numeric constant contains digits beyond the radix");
+
+  if (floatflag != NOT_FLOAT)
+    {
+      tree type;
+      int imag, fflag, lflag, conversion_errno;
+      REAL_VALUE_TYPE real;
+      struct pf_args args;
+      char *copy;
 
-      value = IDENTIFIER;
-      yylval.itype = 0;
+      if (base == 16 && pedantic && !flag_isoc99)
+	pedwarn ("floating constant may not be in radix 16");
 
-      /* Try to recognize a keyword.  Uses minimum-perfect hash function */
+      if (base == 16 && floatflag != AFTER_EXPON)
+	ERROR ("hexadecimal floating constant has no exponent");
 
-      {
-	register struct resword *ptr;
+      /* Read explicit exponent if any, and put it in tokenbuf.  */
+      if ((base == 10 && ((c == 'e') || (c == 'E')))
+	  || (base == 16 && (c == 'p' || c == 'P')))
+	{
+	  if (p < str + len)
+	    c = *p++;
+	  if (p < str + len && (c == '+' || c == '-'))
+	    c = *p++;
+	  /* Exponent is decimal, even if string is a hex float.  */
+	  if (! ISDIGIT (c))
+	    ERROR ("floating constant exponent has no digits");
+	  while (p < str + len && ISDIGIT (c))
+	    c = *p++;
+	  if (! ISDIGIT (c))
+	    p--;
+	}
 
-	if ((ptr = is_reserved_word (token_buffer, p - token_buffer)))
+      /* Copy the float constant now; we don't want any suffixes in the
+	 string passed to parse_float.  */
+      copy = alloca (p - str + 1);
+      memcpy (copy, str, p - str);
+      copy[p - str] = '\0';
+
+      /* Now parse suffixes.  */
+      fflag = lflag = imag = 0;
+      while (p < str + len)
+	switch (*p++)
 	  {
-	    if (ptr->rid)
-	      yylval.ttype = ridpointers[(int) ptr->rid];
-	    value = (int) ptr->token;
-
-	    /* Only return OBJECTNAME if it is a typedef.  */
-	    if (doing_objc_thang && value == OBJECTNAME)
-	      {
-		lastiddecl = lookup_name(yylval.ttype);
-
-		if (lastiddecl == NULL_TREE
-		    || TREE_CODE (lastiddecl) != TYPE_DECL)
-		  value = IDENTIFIER;
-	      }
-
-	    /* Even if we decided to recognize asm, still perhaps warn.  */
-	    if (pedantic
-		&& (value == ASM_KEYWORD || value == TYPEOF
-		    || ptr->rid == RID_INLINE)
-		&& token_buffer[0] != '_')
-	      pedwarn ("ANSI does not permit the keyword `%s'",
-		       token_buffer);
+	  case 'f': case 'F':
+	    if (fflag)
+	      ERROR ("more than one 'f' suffix on floating constant");
+	    else if (warn_traditional && !in_system_header
+		     && ! cpp_sys_macro_p (parse_in))
+	      warning ("traditional C rejects the 'f' suffix");
+
+	    fflag = 1;
+	    break;
+
+	  case 'l': case 'L':
+	    if (lflag)
+	      ERROR ("more than one 'l' suffix on floating constant");
+	    else if (warn_traditional && !in_system_header
+		     && ! cpp_sys_macro_p (parse_in))
+	      warning ("traditional C rejects the 'l' suffix");
+
+	    lflag = 1;
+	    break;
+
+	  case 'i': case 'I':
+	  case 'j': case 'J':
+	    if (imag)
+	      ERROR ("more than one 'i' or 'j' suffix on floating constant");
+	    else if (pedantic)
+	      pedwarn ("ISO C forbids imaginary numeric constants");
+	    imag = 1;
+	    break;
+
+	  default:
+	    ERROR ("invalid suffix on floating constant");
 	  }
-      }
 
-      /* If we did not find a keyword, look for an identifier
-	 (or a typename).  */
+      /* Setup input for parse_float() */
+      args.str = copy;
+      args.fflag = fflag;
+      args.lflag = lflag;
+      args.base = base;
 
-      if (value == IDENTIFIER)
+      /* Convert string to a double, checking for overflow.  */
+      if (do_float_handler (parse_float, (PTR) &args))
 	{
- 	  if (token_buffer[0] == '@')
-	    error("invalid identifier `%s'", token_buffer);
-
-          yylval.ttype = get_identifier (token_buffer);
-	  lastiddecl = lookup_name (yylval.ttype);
-
-	  if (lastiddecl != 0 && TREE_CODE (lastiddecl) == TYPE_DECL)
-	    value = TYPENAME;
-	  /* A user-invisible read-only initialized variable
-	     should be replaced by its value.
-	     We handle only strings since that's the only case used in C.  */
-	  else if (lastiddecl != 0 && TREE_CODE (lastiddecl) == VAR_DECL
-		   && DECL_IGNORED_P (lastiddecl)
-		   && TREE_READONLY (lastiddecl)
-		   && DECL_INITIAL (lastiddecl) != 0
-		   && TREE_CODE (DECL_INITIAL (lastiddecl)) == STRING_CST)
-	    {
-	      tree stringval = DECL_INITIAL (lastiddecl);
+	  /* Receive output from parse_float() */
+	  real = args.value;
+	}
+      else
+	  /* We got an exception from parse_float() */
+	  ERROR ("floating constant out of range");
 
-	      /* Copy the string value so that we won't clobber anything
-		 if we put something in the TREE_CHAIN of this one.  */
-	      yylval.ttype = build_string (TREE_STRING_LENGTH (stringval),
-					   TREE_STRING_POINTER (stringval));
-	      value = STRING;
-	    }
-          else if (doing_objc_thang)
-            {
-	      tree objc_interface_decl = is_class_name (yylval.ttype);
+      /* Receive output from parse_float() */
+      conversion_errno = args.conversion_errno;
+      type = args.type;
+	    
+#ifdef ERANGE
+      /* ERANGE is also reported for underflow,
+	 so test the value to distinguish overflow from that.  */
+      if (conversion_errno == ERANGE && !flag_traditional && pedantic
+	  && (REAL_VALUES_LESS (dconst1, real)
+	      || REAL_VALUES_LESS (real, dconstm1)))
+	warning ("floating point number exceeds range of 'double'");
+#endif
+
+      /* Create a node with determined type and value.  */
+      if (imag)
+	value = build_complex (NULL_TREE, convert (type, integer_zero_node),
+			       build_real (type, real));
+      else
+	value = build_real (type, real);
+    }
+  else
+    {
+      tree trad_type, ansi_type, type;
+      HOST_WIDE_INT high, low;
+      int spec_unsigned = 0;
+      int spec_long = 0;
+      int spec_long_long = 0;
+      int spec_imag = 0;
+      int suffix_lu = 0;
+      int warn = 0, i;
+
+      trad_type = ansi_type = type = NULL_TREE;
+      while (p < str + len)
+	{
+	  c = *p++;
+	  switch (c)
+	    {
+	    case 'u': case 'U':
+	      if (spec_unsigned)
+		error ("two 'u' suffixes on integer constant");
+	      else if (warn_traditional && !in_system_header
+		       && ! cpp_sys_macro_p (parse_in))
+		warning ("traditional C rejects the 'u' suffix");
+
+	      spec_unsigned = 1;
+	      if (spec_long)
+		suffix_lu = 1;
+	      break;
 
-	      if (objc_interface_decl)
+	    case 'l': case 'L':
+	      if (spec_long)
 		{
-		  value = CLASSNAME;
-		  yylval.ttype = objc_interface_decl;
+		  if (spec_long_long)
+		    error ("three 'l' suffixes on integer constant");
+		  else if (suffix_lu)
+		    error ("'lul' is not a valid integer suffix");
+		  else if (c != spec_long)
+		    error ("'Ll' and 'lL' are not valid integer suffixes");
+		  else if (pedantic && ! flag_isoc99
+			   && ! in_system_header && warn_long_long)
+		    pedwarn ("ISO C89 forbids long long integer constants");
+		  spec_long_long = 1;
 		}
-	    }
-	}
+	      spec_long = c;
+	      break;
 
-      break;
+	    case 'i': case 'I': case 'j': case 'J':
+	      if (spec_imag)
+		error ("more than one 'i' or 'j' suffix on integer constant");
+	      else if (pedantic)
+		pedwarn ("ISO C forbids imaginary numeric constants");
+	      spec_imag = 1;
+	      break;
 
-    case '0':  case '1':
-      {
-	int next_c;
-	/* Check first for common special case:  single-digit 0 or 1.  */
+	    default:
+	      ERROR ("invalid suffix on integer constant");
+	    }
+	}
 
-	next_c = GETC ();
-	UNGETC (next_c);	/* Always undo this lookahead.  */
-	if (!ISALNUM (next_c) && next_c != '.')
-	  {
-	    token_buffer[0] = (char)c,  token_buffer[1] = '\0';
-	    yylval.ttype = (c == '0') ? integer_zero_node : integer_one_node;
-	    value = CONSTANT;
-	    break;
-	  }
-	/*FALLTHRU*/
-      }
-    case '2':  case '3':  case '4':
-    case '5':  case '6':  case '7':  case '8':  case '9':
-    case '.':
-      {
-	int base = 10;
-	int count = 0;
-	int largest_digit = 0;
-	int numdigits = 0;
-	/* for multi-precision arithmetic,
-	   we actually store only HOST_BITS_PER_CHAR bits in each part.
-	   The number of parts is chosen so as to be sufficient to hold
-	   the enough bits to fit into the two HOST_WIDE_INTs that contain
-	   the integer value (this is always at least as many bits as are
-	   in a target `long long' value, but may be wider).  */
-#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
-	int parts[TOTAL_PARTS];
-	int overflow = 0;
-
-	enum anon1 { NOT_FLOAT, AFTER_POINT, TOO_MANY_POINTS, AFTER_EXPON}
-	  floatflag = NOT_FLOAT;
-
-	for (count = 0; count < TOTAL_PARTS; count++)
-	  parts[count] = 0;
-
-	p = token_buffer;
-	*p++ = c;
-
-	if (c == '0')
-	  {
-	    *p++ = (c = GETC());
-	    if ((c == 'x') || (c == 'X'))
-	      {
-		base = 16;
-		*p++ = (c = GETC());
-	      }
-	    /* Leading 0 forces octal unless the 0 is the only digit.  */
-	    else if (c >= '0' && c <= '9')
-	      {
-		base = 8;
-		numdigits++;
-	      }
-	    else
-	      numdigits++;
-	  }
+      /* If the literal overflowed, pedwarn about it now.  */
+      if (overflow)
+	{
+	  warn = 1;
+	  pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
+	}
 
-	/* Read all the digits-and-decimal-points.  */
+      /* This is simplified by the fact that our constant
+	 is always positive.  */
 
-	while (c == '.'
-	       || (ISALNUM (c) && c != 'l' && c != 'L'
-		   && c != 'u' && c != 'U'
-		   && c != 'i' && c != 'I' && c != 'j' && c != 'J'
-		   && (floatflag == NOT_FLOAT || ((c != 'f') && (c != 'F')))))
-	  {
-	    if (c == '.')
-	      {
-		if (base == 16 && pedantic)
-		  error ("floating constant may not be in radix 16");
-		if (floatflag == TOO_MANY_POINTS)
-		  /* We have already emitted an error.  Don't need another.  */
-		  ;
-		else if (floatflag == AFTER_POINT || floatflag == AFTER_EXPON)
-		  {
-		    error ("malformed floating constant");
-		    floatflag = TOO_MANY_POINTS;
-		    /* Avoid another error from atof by forcing all characters
-		       from here on to be ignored.  */
-		    p[-1] = '\0';
-		  }
-		else
-		  floatflag = AFTER_POINT;
-
-		if (base == 8)
-		  base = 10;
-		*p++ = c = GETC();
-		/* Accept '.' as the start of a floating-point number
-		   only when it is followed by a digit.
-		   Otherwise, unread the following non-digit
-		   and use the '.' as a structural token.  */
-		if (p == token_buffer + 2 && !ISDIGIT (c))
-		  {
-		    if (c == '.')
-		      {
-			c = GETC();
-			if (c == '.')
-			  {
-			    *p++ = c;
-			    *p = 0;
-			    return ELLIPSIS;
-			  }
-			error ("parse error at `..'");
-		      }
-		    UNGETC (c);
-		    token_buffer[1] = 0;
-		    value = '.';
-		    goto done;
-		  }
-	      }
-	    else
-	      {
-		/* It is not a decimal point.
-		   It should be a digit (perhaps a hex digit).  */
-
-		if (ISDIGIT (c))
-		  {
-		    c = c - '0';
-		  }
-		else if (base <= 10)
-		  {
-		    if (c == 'e' || c == 'E')
-		      {
-			base = 10;
-			floatflag = AFTER_EXPON;
-			break;   /* start of exponent */
-		      }
-		    error ("nondigits in number and not hexadecimal");
-		    c = 0;
-		  }
-		else if (base == 16 && (c == 'p' || c == 'P'))
-		  {
-		    floatflag = AFTER_EXPON;
-		    break;   /* start of exponent */
-		  }
-		else if (c >= 'a')
-		  {
-		    c = c - 'a' + 10;
-		  }
-		else
-		  {
-		    c = c - 'A' + 10;
-		  }
-		if (c >= largest_digit)
-		  largest_digit = c;
-		numdigits++;
-
-		for (count = 0; count < TOTAL_PARTS; count++)
-		  {
-		    parts[count] *= base;
-		    if (count)
-		      {
-			parts[count]
-			  += (parts[count-1] >> HOST_BITS_PER_CHAR);
-			parts[count-1]
-			  &= (1 << HOST_BITS_PER_CHAR) - 1;
-		      }
-		    else
-		      parts[0] += c;
-		  }
-
-		/* If the extra highest-order part ever gets anything in it,
-		   the number is certainly too big.  */
-		if (parts[TOTAL_PARTS - 1] != 0)
-		  overflow = 1;
-
-		if (p >= token_buffer + maxtoken - 3)
-		  p = extend_token_buffer (p);
-		*p++ = (c = GETC());
-	      }
-	  }
+      high = low = 0;
 
-	if (numdigits == 0)
-	  error ("numeric constant with no digits");
+      for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
+	{
+	  high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
+					      / HOST_BITS_PER_CHAR)]
+		   << (i * HOST_BITS_PER_CHAR));
+	  low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
+	}
 
-	if (largest_digit >= base)
-	  error ("numeric constant contains digits beyond the radix");
+      value = build_int_2 (low, high);
+      TREE_TYPE (value) = long_long_unsigned_type_node;
 
-	/* Remove terminating char from the token buffer and delimit the string */
-	*--p = 0;
+      /* If warn_traditional, calculate both the ISO type and the
+	 traditional type, then see if they disagree.
+	 Otherwise, calculate only the type for the dialect in use.  */
+      if (warn_traditional || flag_traditional)
+	{
+	  /* Calculate the traditional type.  */
+	  /* Traditionally, any constant is signed; but if unsigned is
+	     specified explicitly, obey that.  Use the smallest size
+	     with the right number of bits, except for one special
+	     case with decimal constants.  */
+	  if (! spec_long && base != 10
+	      && int_fits_type_p (value, unsigned_type_node))
+	    trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
+	  /* A decimal constant must be long if it does not fit in
+	     type int.  I think this is independent of whether the
+	     constant is signed.  */
+	  else if (! spec_long && base == 10
+		   && int_fits_type_p (value, integer_type_node))
+	    trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
+	  else if (! spec_long_long)
+	    trad_type = (spec_unsigned
+			 ? long_unsigned_type_node
+			 : long_integer_type_node);
+	  else if (int_fits_type_p (value,
+				    spec_unsigned 
+				    ? long_long_unsigned_type_node
+				    : long_long_integer_type_node)) 
+	    trad_type = (spec_unsigned
+			 ? long_long_unsigned_type_node
+			 : long_long_integer_type_node);
+	  else
+	    trad_type = (spec_unsigned
+			 ? widest_unsigned_literal_type_node
+			 : widest_integer_literal_type_node);
+	}
+      if (warn_traditional || ! flag_traditional)
+	{
+	  /* Calculate the ISO type.  */
+	  if (! spec_long && ! spec_unsigned
+	      && int_fits_type_p (value, integer_type_node))
+	    ansi_type = integer_type_node;
+	  else if (! spec_long && (base != 10 || spec_unsigned)
+		   && int_fits_type_p (value, unsigned_type_node))
+	    ansi_type = unsigned_type_node;
+	  else if (! spec_unsigned && !spec_long_long
+		   && int_fits_type_p (value, long_integer_type_node))
+	    ansi_type = long_integer_type_node;
+	  else if (! spec_long_long
+		   && int_fits_type_p (value, long_unsigned_type_node))
+	    ansi_type = long_unsigned_type_node;
+	  else if (! spec_unsigned
+		   && int_fits_type_p (value, long_long_integer_type_node))
+	    ansi_type = long_long_integer_type_node;
+	  else if (int_fits_type_p (value, long_long_unsigned_type_node))
+	    ansi_type = long_long_unsigned_type_node;
+	  else if (! spec_unsigned
+		   && int_fits_type_p (value, widest_integer_literal_type_node))
+	    ansi_type = widest_integer_literal_type_node;
+	  else
+	    ansi_type = widest_unsigned_literal_type_node;
+	}
 
-	if (floatflag != NOT_FLOAT)
-	  {
-	    tree type = double_type_node;
-	    int imag = 0;
-	    int conversion_errno = 0;
-	    REAL_VALUE_TYPE value;
-	    struct pf_args args;
-
-	    /* Read explicit exponent if any, and put it in tokenbuf.  */
-
-	    if ((base == 10 && ((c == 'e') || (c == 'E')))
-		|| (base == 16 && (c == 'p' || c == 'P')))
-	      {
-		if (p >= token_buffer + maxtoken - 3)
-		  p = extend_token_buffer (p);
-		*p++ = c;
-		c = GETC();
-		if ((c == '+') || (c == '-'))
-		  {
-		    *p++ = c;
-		    c = GETC();
-		  }
-		/* Exponent is decimal, even if string is a hex float.  */
-		if (! ISDIGIT (c))
-		  error ("floating constant exponent has no digits");
-		while (ISDIGIT (c))
-		  {
-		    if (p >= token_buffer + maxtoken - 3)
-		      p = extend_token_buffer (p);
-		    *p++ = c;
-		    c = GETC();
-		  }
-	      }
-	    if (base == 16 && floatflag != AFTER_EXPON)
-	      error ("hexadecimal floating constant has no exponent");
-
-	    *p = 0;
-
-	    /* Setup input for parse_float() */
-	    args.base = base;
-	    args.p = p;
-	    args.c = c;
-	    args.imag = imag;
-	    args.type = type;
-	    args.conversion_errno = conversion_errno;
-
-	    /* Convert string to a double, checking for overflow.  */
-	    if (do_float_handler (parse_float, (PTR) &args))
-	      {
-		/* Receive output from parse_float() */
-		value = args.value;
-	      }
-	    else
-	      {
-		/* We got an exception from parse_float() */
-		error ("floating constant out of range");
-		value = dconst0;
-	      }
-
-	    /* Receive output from parse_float() */
-	    c = args.c;
-	    imag = args.imag;
-	    type = args.type;
-	    conversion_errno = args.conversion_errno;
-	    
-#ifdef ERANGE
-	    /* ERANGE is also reported for underflow,
-	       so test the value to distinguish overflow from that.  */
-	    if (conversion_errno == ERANGE && !flag_traditional && pedantic
-		&& (REAL_VALUES_LESS (dconst1, value)
-		    || REAL_VALUES_LESS (value, dconstm1)))
-	      warning ("floating point number exceeds range of `double'");
-#endif
+      type = flag_traditional ? trad_type : ansi_type;
 
-	    /* If the result is not a number, assume it must have been
-	       due to some error message above, so silently convert
-	       it to a zero.  */
-	    if (REAL_VALUE_ISNAN (value))
-	      value = dconst0;
+      /* We assume that constants specified in a non-decimal
+	 base are bit patterns, and that the programmer really
+	 meant what they wrote.  */
+      if (warn_traditional && !in_system_header
+	  && base == 10 && trad_type != ansi_type)
+	{
+	  if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
+	    warning ("width of integer constant changes with -traditional");
+	  else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
+	    warning ("integer constant is unsigned in ISO C, signed with -traditional");
+	  else
+	    warning ("width of integer constant may change on other systems with -traditional");
+	}
 
-	    /* Create a node with determined type and value.  */
-	    if (imag)
-	      yylval.ttype = build_complex (NULL_TREE,
-					    convert (type, integer_zero_node),
-					    build_real (type, value));
-	    else
-	      yylval.ttype = build_real (type, value);
-	  }
-	else
-	  {
-	    tree traditional_type, ansi_type, type;
-	    HOST_WIDE_INT high, low;
-	    int spec_unsigned = 0;
-	    int spec_long = 0;
-	    int spec_long_long = 0;
-	    int spec_imag = 0;
-	    int warn, i;
-
-	    traditional_type = ansi_type = type = NULL_TREE;
-	    while (1)
-	      {
-		if (c == 'u' || c == 'U')
-		  {
-		    if (spec_unsigned)
-		      error ("two `u's in integer constant");
-		    spec_unsigned = 1;
-		  }
-		else if (c == 'l' || c == 'L')
-		  {
-		    if (spec_long)
-		      {
-			if (spec_long_long)
-			  error ("three `l's in integer constant");
-			else if (pedantic && ! in_system_header && warn_long_long)
-			  pedwarn ("ANSI C forbids long long integer constants");
-			spec_long_long = 1;
-		      }
-		    spec_long = 1;
-		  }
-		else if (c == 'i' || c == 'j' || c == 'I' || c == 'J')
-		  {
-		    if (spec_imag)
-		      error ("more than one `i' or `j' in numeric constant");
-		    else if (pedantic)
-		      pedwarn ("ANSI C forbids imaginary numeric constants");
-		    spec_imag = 1;
-		  }
-		else
-		  break;
-		if (p >= token_buffer + maxtoken - 3)
-		  p = extend_token_buffer (p);
-		*p++ = c;
-		c = GETC();
-	      }
-
-	    /* If it won't fit in the host's representation for integers,
-	       then pedwarn. */
-
-	    warn = overflow;
-	    if (warn)
-	      pedwarn ("integer constant out of range");
-
-	    /* This is simplified by the fact that our constant
-	       is always positive.  */
-
-	    high = low = 0;
-
-	    for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
-	      {
-		high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
-						    / HOST_BITS_PER_CHAR)]
-			 << (i * HOST_BITS_PER_CHAR));
-		low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
-	      }
-
-	    yylval.ttype = build_int_2 (low, high);
-	    TREE_TYPE (yylval.ttype) = long_long_unsigned_type_node;
-
-	    /* If warn_traditional, calculate both the ANSI type and the
-	       traditional type, then see if they disagree.
-	       Otherwise, calculate only the type for the dialect in use.  */
-	    if (warn_traditional || flag_traditional)
-	      {
-		/* Calculate the traditional type.  */
-		/* Traditionally, any constant is signed;
-		   but if unsigned is specified explicitly, obey that.
-		   Use the smallest size with the right number of bits,
-		   except for one special case with decimal constants.  */
-		if (! spec_long && base != 10
-		    && int_fits_type_p (yylval.ttype, unsigned_type_node))
-		  traditional_type = (spec_unsigned ? unsigned_type_node
-				      : integer_type_node);
-		/* A decimal constant must be long
-		   if it does not fit in type int.
-		   I think this is independent of whether
-		   the constant is signed.  */
-		else if (! spec_long && base == 10
-			 && int_fits_type_p (yylval.ttype, integer_type_node))
-		  traditional_type = (spec_unsigned ? unsigned_type_node
-				      : integer_type_node);
-		else if (! spec_long_long)
-		  traditional_type = (spec_unsigned ? long_unsigned_type_node
-				      : long_integer_type_node);
-		else
-		  traditional_type = (spec_unsigned
-				      ? long_long_unsigned_type_node
-				      : long_long_integer_type_node);
-	      }
-	    if (warn_traditional || ! flag_traditional)
-	      {
-		/* Calculate the ANSI type.  */
-		if (! spec_long && ! spec_unsigned
-		    && int_fits_type_p (yylval.ttype, integer_type_node))
-		  ansi_type = integer_type_node;
-		else if (! spec_long && (base != 10 || spec_unsigned)
-			 && int_fits_type_p (yylval.ttype, unsigned_type_node))
-		  ansi_type = unsigned_type_node;
-		else if (! spec_unsigned && !spec_long_long
-			 && int_fits_type_p (yylval.ttype, long_integer_type_node))
-		  ansi_type = long_integer_type_node;
-		else if (! spec_long_long
-			 && int_fits_type_p (yylval.ttype,
-					     long_unsigned_type_node))
-		  ansi_type = long_unsigned_type_node;
-		else if (! spec_unsigned
-			 && int_fits_type_p (yylval.ttype,
-					     long_long_integer_type_node))
-		  ansi_type = long_long_integer_type_node;
-		else
-		  ansi_type = long_long_unsigned_type_node;
-	      }
-
-	    type = flag_traditional ? traditional_type : ansi_type;
-
-	    if (warn_traditional && traditional_type != ansi_type)
-	      {
-		if (TYPE_PRECISION (traditional_type)
-		    != TYPE_PRECISION (ansi_type))
-		  warning ("width of integer constant changes with -traditional");
-		else if (TREE_UNSIGNED (traditional_type)
-			 != TREE_UNSIGNED (ansi_type))
-		  warning ("integer constant is unsigned in ANSI C, signed with -traditional");
-		else
-		  warning ("width of integer constant may change on other systems with -traditional");
-	      }
-
-	    if (pedantic && !flag_traditional && !spec_long_long && !warn
-		&& (TYPE_PRECISION (long_integer_type_node)
-		    < TYPE_PRECISION (type)))
-	      {
-		warn = 1;
-		pedwarn ("integer constant out of range");
-	      }
-
-	    if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
-	      warning ("decimal constant is so large that it is unsigned");
-
-	    if (spec_imag)
-	      {
-		if (TYPE_PRECISION (type)
-		    <= TYPE_PRECISION (integer_type_node))
-		  yylval.ttype
-		    = build_complex (NULL_TREE, integer_zero_node,
-				     convert (integer_type_node,
-					      yylval.ttype));
-		else
-		  error ("complex integer constant is too wide for `complex int'");
-	      }
-	    else if (flag_traditional && !int_fits_type_p (yylval.ttype, type))
-	      /* The traditional constant 0x80000000 is signed
-		 but doesn't fit in the range of int.
-		 This will change it to -0x80000000, which does fit.  */
-	      {
-		TREE_TYPE (yylval.ttype) = unsigned_type (type);
-		yylval.ttype = convert (type, yylval.ttype);
-		TREE_OVERFLOW (yylval.ttype)
-		  = TREE_CONSTANT_OVERFLOW (yylval.ttype) = 0;
-	      }
-	    else
-	      TREE_TYPE (yylval.ttype) = type;
-
-
-	    /* If it's still an integer (not a complex), and it doesn't
-	       fit in the type we choose for it, then pedwarn. */
-
-	    if (! warn
-		&& TREE_CODE (TREE_TYPE (yylval.ttype)) == INTEGER_TYPE
-		&& ! int_fits_type_p (yylval.ttype, TREE_TYPE (yylval.ttype)))
-	      pedwarn ("integer constant out of range");
-	  }
+      if (pedantic && !flag_traditional && (flag_isoc99 || !spec_long_long)
+	  && !warn
+	  && ((flag_isoc99
+	       ? TYPE_PRECISION (long_long_integer_type_node)
+	       : TYPE_PRECISION (long_integer_type_node)) < TYPE_PRECISION (type)))
+	{
+	  warn = 1;
+	  pedwarn ("integer constant larger than the maximum value of %s",
+		   (flag_isoc99
+		    ? (TREE_UNSIGNED (type)
+		       ? _("an unsigned long long int")
+		       : _("a long long int"))
+		    : _("an unsigned long int")));
+	}
 
-	UNGETC (c);
-	*p = 0;
-
-	if (ISALNUM (c) || c == '.' || c == '_' || c == '$'
-	    || (!flag_traditional && (c == '-' || c == '+')
-		&& (p[-1] == 'e' || p[-1] == 'E')))
-	  error ("missing white space after number `%s'", token_buffer);
-
-	value = CONSTANT; break;
-      }
-
-    case '\'':
-    char_constant:
-      {
-	register int result = 0;
-	register int num_chars = 0;
-	int chars_seen = 0;
-	unsigned width = TYPE_PRECISION (char_type_node);
-	int max_chars;
-#ifdef MULTIBYTE_CHARS
-	int longest_char = local_mb_cur_max ();
-	(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
-#endif
+      if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
+	warning ("decimal constant is so large that it is unsigned");
 
-	max_chars = TYPE_PRECISION (integer_type_node) / width;
-	if (wide_flag)
-	  width = WCHAR_TYPE_SIZE;
+      if (spec_imag)
+	{
+	  if (TYPE_PRECISION (type)
+	      <= TYPE_PRECISION (integer_type_node))
+	    value = build_complex (NULL_TREE, integer_zero_node,
+				   convert (integer_type_node, value));
+	  else
+	    ERROR ("complex integer constant is too wide for 'complex int'");
+	}
+      else if (flag_traditional && !int_fits_type_p (value, type))
+	/* The traditional constant 0x80000000 is signed
+	   but doesn't fit in the range of int.
+	   This will change it to -0x80000000, which does fit.  */
+	{
+	  TREE_TYPE (value) = unsigned_type (type);
+	  value = convert (type, value);
+	  TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
+	}
+      else
+	TREE_TYPE (value) = type;
 
-	while (1)
-	  {
-	  tryagain:
-	    c = GETC();
+      /* If it's still an integer (not a complex), and it doesn't
+	 fit in the type we choose for it, then pedwarn.  */
 
-	    if (c == '\'' || c == EOF)
-	      break;
+      if (! warn
+	  && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
+	  && ! int_fits_type_p (value, TREE_TYPE (value)))
+	pedwarn ("integer constant is larger than the maximum value for its type");
+    }
 
-	    ++chars_seen;
-	    if (c == '\\')
-	      {
-		int ignore = 0;
-		c = readescape (&ignore);
-		if (ignore)
-		  goto tryagain;
-		if (width < HOST_BITS_PER_INT
-		    && (unsigned) c >= ((unsigned)1 << width))
-		  pedwarn ("escape sequence out of range for character");
-#ifdef MAP_CHARACTER
-		if (ISPRINT (c))
-		  c = MAP_CHARACTER (c);
-#endif
-	      }
-	    else if (c == '\n')
-	      {
-		if (pedantic)
-		  pedwarn ("ANSI C forbids newline in character constant");
-		lineno++;
-	      }
-	    else
-	      {
-#ifdef MULTIBYTE_CHARS
-		wchar_t wc;
-		int i;
-		int char_len = -1;
-		for (i = 1; i <= longest_char; ++i)
-		  {
-		    if (i > maxtoken - 4)
-		      extend_token_buffer (token_buffer);
-
-		    token_buffer[i] = c;
-		    char_len = local_mbtowc (& wc,
-					     token_buffer + 1,
-					     i);
-		    if (char_len != -1)
-		      break;
-		    c = GETC ();
-		  }
-		if (char_len > 1)
-		  {
-		    /* mbtowc sometimes needs an extra char before accepting */
-		    if (char_len < i)
-		      UNGETC (c);
-		    if (! wide_flag)
-		      {
-			/* Merge character into result; ignore excess chars.  */
-			for (i = 1; i <= char_len; ++i)
-			  {
-			    if (i > max_chars)
-			      break;
-			    if (width < HOST_BITS_PER_INT)
-			      result = (result << width)
-				| (token_buffer[i]
-				   & ((1 << width) - 1));
-			    else
-			      result = token_buffer[i];
-			  }
-			num_chars += char_len;
-			goto tryagain;
-		      }
-		    c = wc;
-		  }
-		else
-		  {
-		    if (char_len == -1)
-		      warning ("Ignoring invalid multibyte character");
-		    if (wide_flag)
-		      c = wc;
-#ifdef MAP_CHARACTER
-		    else
-		      c = MAP_CHARACTER (c);
-#endif
-		  }
-#else /* ! MULTIBYTE_CHARS */
-#ifdef MAP_CHARACTER
-		c = MAP_CHARACTER (c);
-#endif
-#endif /* ! MULTIBYTE_CHARS */
-	      }
-
-	    if (wide_flag)
-	      {
-		if (chars_seen == 1) /* only keep the first one */
-		  result = c;
-		goto tryagain;
-	      }
-
-	    /* Merge character into result; ignore excess chars.  */
-	    num_chars += (width / TYPE_PRECISION (char_type_node));
-	    if (num_chars < max_chars + 1)
-	      {
-		if (width < HOST_BITS_PER_INT)
-		  result = (result << width) | (c & ((1 << width) - 1));
-		else
-		  result = c;
-	      }
-	  }
+  if (p < str + len)
+    error ("missing white space after number '%.*s'", (int) (p - str), str);
 
-	if (c != '\'')
-	  error ("malformatted character constant");
-	else if (chars_seen == 0)
-	  error ("empty character constant");
-	else if (num_chars > max_chars)
-	  {
-	    num_chars = max_chars;
-	    error ("character constant too long");
-	  }
-	else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
-	  warning ("multi-character character constant");
+  return value;
 
-	/* If char type is signed, sign-extend the constant.  */
-	if (! wide_flag)
-	  {
-	    int num_bits = num_chars * width;
-	    if (num_bits == 0)
-	      /* We already got an error; avoid invalid shift.  */
-	      yylval.ttype = build_int_2 (0, 0);
-	    else if (TREE_UNSIGNED (char_type_node)
-		     || ((result >> (num_bits - 1)) & 1) == 0)
-	      yylval.ttype
-		= build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
-					 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
-			       0);
-	    else
-	      yylval.ttype
-		= build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
-					  >> (HOST_BITS_PER_WIDE_INT - num_bits)),
-			       -1);
-	    TREE_TYPE (yylval.ttype) = integer_type_node;
-	  }
-	else
-	  {
-	    yylval.ttype = build_int_2 (result, 0);
-	    TREE_TYPE (yylval.ttype) = wchar_type_node;
-	  }
+ syntax_error:
+  return integer_zero_node;
+}
 
-	value = CONSTANT;
-	break;
-      }
+static tree
+lex_string (str, len, wide)
+     const char *str;
+     unsigned int len;
+     int wide;
+{
+  tree value;
+  char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
+  char *q = buf;
+  const char *p = str, *limit = str + len;
+  unsigned int c;
+  unsigned width = wide ? WCHAR_TYPE_SIZE
+			: TYPE_PRECISION (char_type_node);
 
-    case '"':
-    string_constant:
-      {
-	unsigned width = wide_flag ? WCHAR_TYPE_SIZE
-	                           : TYPE_PRECISION (char_type_node);
 #ifdef MULTIBYTE_CHARS
-	int longest_char = local_mb_cur_max ();
-	(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
+  /* Reset multibyte conversion state.  */
+  (void) local_mbtowc (NULL, NULL, 0);
 #endif
-	c = GETC ();
-	p = token_buffer + 1;
 
-	while (c != '"' && c >= 0)
-	  {
-	    if (c == '\\')
-	      {
-		int ignore = 0;
-		c = readescape (&ignore);
-		if (ignore)
-		  goto skipnewline;
-		if (width < HOST_BITS_PER_INT
-		    && (unsigned) c >= ((unsigned)1 << width))
-		  pedwarn ("escape sequence out of range for character");
-	      }
-	    else if (c == '\n')
-	      {
-		if (pedantic)
-		  pedwarn ("ANSI C forbids newline in string constant");
-		lineno++;
-	      }
-	    else
-	      {
+  while (p < limit)
+    {
 #ifdef MULTIBYTE_CHARS
-		wchar_t wc;
-		int i;
-		int char_len = -1;
-		for (i = 0; i < longest_char; ++i)
-		  {
-		    if (p + i >= token_buffer + maxtoken)
-		      p = extend_token_buffer (p);
-		    p[i] = c;
-
-		    char_len = local_mbtowc (& wc, p, i + 1);
-		    if (char_len != -1)
-		      break;
-		    c = GETC ();
-		  }
-		if (char_len == -1)
-		  warning ("Ignoring invalid multibyte character");
-		else
-		  {
-		    /* mbtowc sometimes needs an extra char before accepting */
-		    if (char_len <= i)
-		      UNGETC (c);
-		    if (! wide_flag)
-		      {
-			p += (i + 1);
-			c = GETC ();
-			continue;
-		      }
-		    c = wc;
-		  }
-#endif /* MULTIBYTE_CHARS */
-	      }
-
-	    /* Add this single character into the buffer either as a wchar_t
-	       or as a single byte.  */
-	    if (wide_flag)
-	      {
-		unsigned width = TYPE_PRECISION (char_type_node);
-		unsigned bytemask = (1 << width) - 1;
-		int byte;
-
-		if (p + WCHAR_BYTES > token_buffer + maxtoken)
-		  p = extend_token_buffer (p);
-
-		for (byte = 0; byte < WCHAR_BYTES; ++byte)
-		  {
-		    int value;
-		    if (byte >= (int) sizeof (c))
-		      value = 0;
-		    else
-		      value = (c >> (byte * width)) & bytemask;
-		    if (BYTES_BIG_ENDIAN)
-		      p[WCHAR_BYTES - byte - 1] = value;
-		    else
-		      p[byte] = value;
-		  }
-		p += WCHAR_BYTES;
-	      }
-	    else
-	      {
-		if (p >= token_buffer + maxtoken)
-		  p = extend_token_buffer (p);
-		*p++ = c;
-	      }
-
-	  skipnewline:
-	    c = GETC ();
-	  }
+      wchar_t wc;
+      int char_len;
 
-	/* Terminate the string value, either with a single byte zero
-	   or with a wide zero.  */
-	if (wide_flag)
-	  {
-	    if (p + WCHAR_BYTES > token_buffer + maxtoken)
-	      p = extend_token_buffer (p);
-	    bzero (p, WCHAR_BYTES);
-	    p += WCHAR_BYTES;
-	  }
-	else
-	  {
-	    if (p >= token_buffer + maxtoken)
-	      p = extend_token_buffer (p);
-	    *p++ = 0;
-	  }
-
-	if (c < 0)
-	  error ("Unterminated string constant");
-
-	/* We have read the entire constant.
-	   Construct a STRING_CST for the result.  */
-
-	if (wide_flag)
-	  {
-	    yylval.ttype = build_string (p - (token_buffer + 1),
-					 token_buffer + 1);
-	    TREE_TYPE (yylval.ttype) = wchar_array_type_node;
-	    value = STRING;
-	  }
-	else if (objc_flag)
-	  {
-	    /* Return an Objective-C @"..." constant string object.  */
-	    yylval.ttype = build_objc_string (p - (token_buffer + 1),
-					      token_buffer + 1);
-	    TREE_TYPE (yylval.ttype) = char_array_type_node;
-	    value = OBJC_STRING;
-	  }
-	else
-	  {
-	    yylval.ttype = build_string (p - (token_buffer + 1),
-					 token_buffer + 1);
-	    TREE_TYPE (yylval.ttype) = char_array_type_node;
-	    value = STRING;
-	  }
+      char_len = local_mbtowc (&wc, p, limit - p);
+      if (char_len == -1)
+	{
+	  warning ("ignoring invalid multibyte character");
+	  char_len = 1;
+	  c = *p++;
+	}
+      else
+	{
+	  p += char_len;
+	  c = wc;
+	}
+#else
+      c = *p++;
+#endif
 
-	break;
-      }
-
-    case '+':
-    case '-':
-    case '&':
-    case '|':
-    case ':':
-    case '<':
-    case '>':
-    case '*':
-    case '/':
-    case '%':
-    case '^':
-    case '!':
-    case '=':
-      {
-	register int c1;
-
-      combine:
-
-	switch (c)
-	  {
-	  case '+':
-	    yylval.code = PLUS_EXPR; break;
-	  case '-':
-	    yylval.code = MINUS_EXPR; break;
-	  case '&':
-	    yylval.code = BIT_AND_EXPR; break;
-	  case '|':
-	    yylval.code = BIT_IOR_EXPR; break;
-	  case '*':
-	    yylval.code = MULT_EXPR; break;
-	  case '/':
-	    yylval.code = TRUNC_DIV_EXPR; break;
-	  case '%':
-	    yylval.code = TRUNC_MOD_EXPR; break;
-	  case '^':
-	    yylval.code = BIT_XOR_EXPR; break;
-	  case LSHIFT:
-	    yylval.code = LSHIFT_EXPR; break;
-	  case RSHIFT:
-	    yylval.code = RSHIFT_EXPR; break;
-	  case '<':
-	    yylval.code = LT_EXPR; break;
-	  case '>':
-	    yylval.code = GT_EXPR; break;
-	  }
+      if (c == '\\' && !ignore_escape_flag)
+	{
+	  unsigned int mask;
 
-	token_buffer[1] = c1 = GETC();
-	token_buffer[2] = 0;
+	  if (width < HOST_BITS_PER_INT)
+	    mask = ((unsigned int) 1 << width) - 1;
+	  else
+	    mask = ~0;
+	  c = cpp_parse_escape (parse_in, (const unsigned char **) &p,
+				(const unsigned char *) limit,
+				mask, flag_traditional);
+	}
+	
+      /* Add this single character into the buffer either as a wchar_t
+	 or as a single byte.  */
+      if (wide)
+	{
+	  unsigned charwidth = TYPE_PRECISION (char_type_node);
+	  unsigned bytemask = (1 << charwidth) - 1;
+	  int byte;
 
-	if (c1 == '=')
-	  {
-	    switch (c)
-	      {
-	      case '<':
-		value = ARITHCOMPARE; yylval.code = LE_EXPR; goto done;
-	      case '>':
-		value = ARITHCOMPARE; yylval.code = GE_EXPR; goto done;
-	      case '!':
-		value = EQCOMPARE; yylval.code = NE_EXPR; goto done;
-	      case '=':
-		value = EQCOMPARE; yylval.code = EQ_EXPR; goto done;
-	      }
-	    value = ASSIGN; goto done;
-	  }
-	else if (c == c1)
-	  switch (c)
-	    {
-	    case '+':
-	      value = PLUSPLUS; goto done;
-	    case '-':
-	      value = MINUSMINUS; goto done;
-	    case '&':
-	      value = ANDAND; goto done;
-	    case '|':
-	      value = OROR; goto done;
-	    case '<':
-	      c = LSHIFT;
-	      goto combine;
-	    case '>':
-	      c = RSHIFT;
-	      goto combine;
-	    }
-	else
-	  switch (c)
+	  for (byte = 0; byte < WCHAR_BYTES; ++byte)
 	    {
-	    case '-':
-	      if (c1 == '>')
-		{ value = POINTSAT; goto done; }
-	      break;
-	    case ':':
-	      if (c1 == '>')
-		{ value = ']'; goto done; }
-	      break;
-	    case '<':
-	      if (c1 == '%')
-		{ value = '{'; indent_level++; goto done; }
-	      if (c1 == ':')
-		{ value = '['; goto done; }
-	      break;
-	    case '%':
-	      if (c1 == '>')
-		{ value = '}'; indent_level--; goto done; }
-	      break;
+	      int n;
+	      if (byte >= (int) sizeof (c))
+		n = 0;
+	      else
+		n = (c >> (byte * charwidth)) & bytemask;
+	      if (BYTES_BIG_ENDIAN)
+		q[WCHAR_BYTES - byte - 1] = n;
+	      else
+		q[byte] = n;
 	    }
-	UNGETC (c1);
-	token_buffer[1] = 0;
-
-	if ((c == '<') || (c == '>'))
-	  value = ARITHCOMPARE;
-	else value = c;
-	goto done;
-      }
-
-    case 0:
-      /* Don't make yyparse think this is eof.  */
-      value = 1;
-      break;
-
-    case '{':
-      indent_level++;
-      value = c;
-      break;
+	  q += WCHAR_BYTES;
+	}
+      else
+	{
+	  *q++ = c;
+	}
+    }
 
-    case '}':
-      indent_level--;
-      value = c;
-      break;
+  /* Terminate the string value, either with a single byte zero
+     or with a wide zero.  */
 
-    default:
-      value = c;
+  if (wide)
+    {
+      memset (q, 0, WCHAR_BYTES);
+      q += WCHAR_BYTES;
+    }
+  else
+    {
+      *q++ = '\0';
     }
 
-done:
-/*  yylloc.last_line = lineno; */
+  value = build_string (q - buf, buf);
 
+  if (wide)
+    TREE_TYPE (value) = wchar_array_type_node;
+  else
+    TREE_TYPE (value) = char_array_type_node;
   return value;
 }
 
-/* Sets the value of the 'yydebug' variable to VALUE.
-   This is a function so we don't have to have YYDEBUG defined
-   in order to build the compiler.  */
-
-void
-set_yydebug (value)
-     int value;
+/* Converts a (possibly wide) character constant token into a tree.  */
+static tree
+lex_charconst (token)
+     const cpp_token *token;
 {
-#if YYDEBUG != 0
-  yydebug = value;
-#else
-  warning ("YYDEBUG not defined.");
-#endif
+  HOST_WIDE_INT result;
+  tree value;
+  unsigned int chars_seen;
+ 
+  result = cpp_interpret_charconst (parse_in, token, warn_multichar,
+ 				    flag_traditional, &chars_seen);
+  if (token->type == CPP_WCHAR)
+    {
+      value = build_int_2 (result, 0);
+      TREE_TYPE (value) = wchar_type_node;
+    }
+  else
+    {
+      if (result < 0)
+ 	value = build_int_2 (result, -1);
+      else
+ 	value = build_int_2 (result, 0);
+ 
+      /* In C, a character constant has type 'int'.
+ 	 In C++ 'char', but multi-char charconsts have type 'int'.  */
+      if (c_language == clk_cplusplus && chars_seen <= 1)
+ 	TREE_TYPE (value) = char_type_node;
+      else
+ 	TREE_TYPE (value) = integer_type_node;
+    }
+ 
+  return value;
 }
author	obrien <obrien@FreeBSD.org>	2002-02-01 18:16:02 +0000
committer	obrien <obrien@FreeBSD.org>	2002-02-01 18:16:02 +0000
commit	c9ab9ae440a8066b2c2b85b157b1fdadcf09916a (patch)
tree	086d9d6c8fbd4fc8fe4495059332f66bc0f8d12b /contrib/gcc/c-lex.c
parent	2ecfd8bd04b63f335c1ec6295740a4bfd97a4fa6 (diff)
download	FreeBSD-src-c9ab9ae440a8066b2c2b85b157b1fdadcf09916a.zip FreeBSD-src-c9ab9ae440a8066b2c2b85b157b1fdadcf09916a.tar.gz