mirror of
https://github.com/xomboverlord/ldc.git
synced 2026-01-13 11:23:14 +01:00
Fixed problems with label collisions when using labels inside inline asm. LabelStatement is now easily reached given its Identifier, which should be useful elsewhere too. Enabled inline asm for building the lib/compiler/llvmdc runtime code, fixing branches out of asm makes this possible.
3065 lines
58 KiB
C
3065 lines
58 KiB
C
|
|
// Compiler implementation of the D programming language
|
|
// Copyright (c) 1999-2008 by Digital Mars
|
|
// All Rights Reserved
|
|
// written by Walter Bright
|
|
// http://www.digitalmars.com
|
|
// License for redistribution is by either the Artistic License
|
|
// in artistic.txt, or the GNU General Public License in gnu.txt.
|
|
// See the included readme.txt for details.
|
|
|
|
/* Lexical Analyzer */
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <stdarg.h>
|
|
#include <errno.h>
|
|
#include <wchar.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <sys/time.h>
|
|
|
|
#ifdef IN_GCC
|
|
|
|
#include <time.h>
|
|
#include "mem.h"
|
|
|
|
#else
|
|
|
|
#if __GNUC__
|
|
#include <time.h>
|
|
#endif
|
|
|
|
#if IN_LLVM
|
|
#include "mem.h"
|
|
#elif _WIN32
|
|
#include "..\root\mem.h"
|
|
#else
|
|
#include "../root/mem.h"
|
|
#endif
|
|
#endif
|
|
|
|
#include "stringtable.h"
|
|
|
|
#include "lexer.h"
|
|
#include "utf.h"
|
|
#include "identifier.h"
|
|
#include "id.h"
|
|
#include "module.h"
|
|
|
|
#if _WIN32 && __DMC__
|
|
// from \dm\src\include\setlocal.h
|
|
extern "C" char * __cdecl __locale_decpoint;
|
|
#endif
|
|
|
|
extern int HtmlNamedEntity(unsigned char *p, int length);
|
|
|
|
#define LS 0x2028 // UTF line separator
|
|
#define PS 0x2029 // UTF paragraph separator
|
|
|
|
/********************************************
|
|
* Do our own char maps
|
|
*/
|
|
|
|
static unsigned char cmtable[256];
|
|
|
|
const int CMoctal = 0x1;
|
|
const int CMhex = 0x2;
|
|
const int CMidchar = 0x4;
|
|
|
|
inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
|
|
inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; }
|
|
inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }
|
|
|
|
static void cmtable_init()
|
|
{
|
|
for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
|
|
{
|
|
if ('0' <= c && c <= '7')
|
|
cmtable[c] |= CMoctal;
|
|
if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
|
|
cmtable[c] |= CMhex;
|
|
if (isalnum(c) || c == '_')
|
|
cmtable[c] |= CMidchar;
|
|
}
|
|
}
|
|
|
|
|
|
/************************* Token **********************************************/
|
|
|
|
char *Token::tochars[TOKMAX];
|
|
|
|
void *Token::operator new(size_t size)
|
|
{ Token *t;
|
|
|
|
if (Lexer::freelist)
|
|
{
|
|
t = Lexer::freelist;
|
|
Lexer::freelist = t->next;
|
|
return t;
|
|
}
|
|
|
|
return ::operator new(size);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
void Token::print()
|
|
{
|
|
fprintf(stdmsg, "%s\n", toChars());
|
|
}
|
|
#endif
|
|
|
|
char *Token::toChars()
|
|
{ char *p;
|
|
static char buffer[3 + 3 * sizeof(value) + 1];
|
|
|
|
p = buffer;
|
|
switch (value)
|
|
{
|
|
case TOKint32v:
|
|
#if IN_GCC
|
|
sprintf(buffer,"%d",(d_int32)int64value);
|
|
#else
|
|
sprintf(buffer,"%d",int32value);
|
|
#endif
|
|
break;
|
|
|
|
case TOKuns32v:
|
|
case TOKcharv:
|
|
case TOKwcharv:
|
|
case TOKdcharv:
|
|
#if IN_GCC
|
|
sprintf(buffer,"%uU",(d_uns32)uns64value);
|
|
#else
|
|
sprintf(buffer,"%uU",uns32value);
|
|
#endif
|
|
break;
|
|
|
|
case TOKint64v:
|
|
sprintf(buffer,"%lldL",int64value);
|
|
break;
|
|
|
|
case TOKuns64v:
|
|
sprintf(buffer,"%lluUL",uns64value);
|
|
break;
|
|
|
|
#if IN_GCC
|
|
case TOKfloat32v:
|
|
case TOKfloat64v:
|
|
case TOKfloat80v:
|
|
float80value.format(buffer, sizeof(buffer));
|
|
break;
|
|
case TOKimaginary32v:
|
|
case TOKimaginary64v:
|
|
case TOKimaginary80v:
|
|
float80value.format(buffer, sizeof(buffer));
|
|
// %% buffer
|
|
strcat(buffer, "i");
|
|
break;
|
|
#else
|
|
case TOKfloat32v:
|
|
sprintf(buffer,"%Lgf", float80value);
|
|
break;
|
|
|
|
case TOKfloat64v:
|
|
sprintf(buffer,"%Lg", float80value);
|
|
break;
|
|
|
|
case TOKfloat80v:
|
|
sprintf(buffer,"%LgL", float80value);
|
|
break;
|
|
|
|
case TOKimaginary32v:
|
|
sprintf(buffer,"%Lgfi", float80value);
|
|
break;
|
|
|
|
case TOKimaginary64v:
|
|
sprintf(buffer,"%Lgi", float80value);
|
|
break;
|
|
|
|
case TOKimaginary80v:
|
|
sprintf(buffer,"%LgLi", float80value);
|
|
break;
|
|
#endif
|
|
|
|
case TOKstring:
|
|
#if CSTRINGS
|
|
p = string;
|
|
#else
|
|
{ OutBuffer buf;
|
|
|
|
buf.writeByte('"');
|
|
for (size_t i = 0; i < len; )
|
|
{ unsigned c;
|
|
|
|
utf_decodeChar((unsigned char *)ustring, len, &i, &c);
|
|
switch (c)
|
|
{
|
|
case 0:
|
|
break;
|
|
|
|
case '"':
|
|
case '\\':
|
|
buf.writeByte('\\');
|
|
default:
|
|
if (isprint(c))
|
|
buf.writeByte(c);
|
|
else if (c <= 0x7F)
|
|
buf.printf("\\x%02x", c);
|
|
else if (c <= 0xFFFF)
|
|
buf.printf("\\u%04x", c);
|
|
else
|
|
buf.printf("\\U%08x", c);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
buf.writeByte('"');
|
|
if (postfix)
|
|
buf.writeByte('"');
|
|
buf.writeByte(0);
|
|
p = (char *)buf.extractData();
|
|
}
|
|
#endif
|
|
break;
|
|
|
|
case TOKidentifier:
|
|
case TOKenum:
|
|
case TOKstruct:
|
|
case TOKimport:
|
|
CASE_BASIC_TYPES:
|
|
p = ident->toChars();
|
|
break;
|
|
|
|
default:
|
|
p = toChars(value);
|
|
break;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
char *Token::toChars(enum TOK value)
|
|
{ char *p;
|
|
static char buffer[3 + 3 * sizeof(value) + 1];
|
|
|
|
p = tochars[value];
|
|
if (!p)
|
|
{ sprintf(buffer,"TOK%d",value);
|
|
p = buffer;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
/*************************** Lexer ********************************************/
|
|
|
|
Token *Lexer::freelist = NULL;
|
|
StringTable Lexer::stringtable;
|
|
OutBuffer Lexer::stringbuffer;
|
|
|
|
Lexer::Lexer(Module *mod,
|
|
unsigned char *base, unsigned begoffset, unsigned endoffset,
|
|
int doDocComment, int commentToken)
|
|
: loc(mod, 1)
|
|
{
|
|
//printf("Lexer::Lexer(%p,%d)\n",base,length);
|
|
//printf("lexer.mod = %p, %p\n", mod, this->loc.mod);
|
|
memset(&token,0,sizeof(token));
|
|
this->base = base;
|
|
this->end = base + endoffset;
|
|
p = base + begoffset;
|
|
this->mod = mod;
|
|
this->doDocComment = doDocComment;
|
|
this->anyToken = 0;
|
|
this->commentToken = commentToken;
|
|
//initKeywords();
|
|
|
|
/* If first line starts with '#!', ignore the line
|
|
*/
|
|
|
|
if (p[0] == '#' && p[1] =='!')
|
|
{
|
|
p += 2;
|
|
while (1)
|
|
{ unsigned char c = *p;
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
p++;
|
|
break;
|
|
|
|
case '\r':
|
|
p++;
|
|
if (*p == '\n')
|
|
p++;
|
|
break;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
break;
|
|
|
|
default:
|
|
if (c & 0x80)
|
|
{ unsigned u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
break;
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
loc.linnum = 2;
|
|
}
|
|
}
|
|
|
|
|
|
void Lexer::error(const char *format, ...)
|
|
{
|
|
if (mod && !global.gag)
|
|
{
|
|
char *p = loc.toChars();
|
|
if (*p)
|
|
fprintf(stdmsg, "%s: ", p);
|
|
mem.free(p);
|
|
|
|
va_list ap;
|
|
va_start(ap, format);
|
|
vfprintf(stdmsg, format, ap);
|
|
va_end(ap);
|
|
|
|
fprintf(stdmsg, "\n");
|
|
fflush(stdmsg);
|
|
|
|
if (global.errors >= 20) // moderate blizzard of cascading messages
|
|
fatal();
|
|
}
|
|
global.errors++;
|
|
}
|
|
|
|
void Lexer::error(Loc loc, const char *format, ...)
|
|
{
|
|
if (mod && !global.gag)
|
|
{
|
|
char *p = loc.toChars();
|
|
if (*p)
|
|
fprintf(stdmsg, "%s: ", p);
|
|
mem.free(p);
|
|
|
|
va_list ap;
|
|
va_start(ap, format);
|
|
vfprintf(stdmsg, format, ap);
|
|
va_end(ap);
|
|
|
|
fprintf(stdmsg, "\n");
|
|
fflush(stdmsg);
|
|
|
|
if (global.errors >= 20) // moderate blizzard of cascading messages
|
|
fatal();
|
|
}
|
|
global.errors++;
|
|
}
|
|
|
|
TOK Lexer::nextToken()
|
|
{ Token *t;
|
|
|
|
if (token.next)
|
|
{
|
|
t = token.next;
|
|
memcpy(&token,t,sizeof(Token));
|
|
t->next = freelist;
|
|
freelist = t;
|
|
}
|
|
else
|
|
{
|
|
scan(&token);
|
|
}
|
|
//token.print();
|
|
return token.value;
|
|
}
|
|
|
|
Token *Lexer::peek(Token *ct)
|
|
{ Token *t;
|
|
|
|
if (ct->next)
|
|
t = ct->next;
|
|
else
|
|
{
|
|
t = new Token();
|
|
scan(t);
|
|
t->next = NULL;
|
|
ct->next = t;
|
|
}
|
|
return t;
|
|
}
|
|
|
|
/*********************************
|
|
* tk is on the opening (.
|
|
* Look ahead and return token that is past the closing ).
|
|
*/
|
|
|
|
Token *Lexer::peekPastParen(Token *tk)
|
|
{
|
|
//printf("peekPastParen()\n");
|
|
int parens = 1;
|
|
int curlynest = 0;
|
|
while (1)
|
|
{
|
|
tk = peek(tk);
|
|
//tk->print();
|
|
switch (tk->value)
|
|
{
|
|
case TOKlparen:
|
|
parens++;
|
|
continue;
|
|
|
|
case TOKrparen:
|
|
--parens;
|
|
if (parens)
|
|
continue;
|
|
tk = peek(tk);
|
|
break;
|
|
|
|
case TOKlcurly:
|
|
curlynest++;
|
|
continue;
|
|
|
|
case TOKrcurly:
|
|
if (--curlynest >= 0)
|
|
continue;
|
|
break;
|
|
|
|
case TOKsemicolon:
|
|
if (curlynest)
|
|
continue;
|
|
break;
|
|
|
|
case TOKeof:
|
|
break;
|
|
|
|
default:
|
|
continue;
|
|
}
|
|
return tk;
|
|
}
|
|
}
|
|
|
|
/**********************************
|
|
* Determine if string is a valid Identifier.
|
|
* Placed here because of commonality with Lexer functionality.
|
|
* Returns:
|
|
* 0 invalid
|
|
*/
|
|
|
|
int Lexer::isValidIdentifier(char *p)
|
|
{
|
|
size_t len;
|
|
size_t idx;
|
|
|
|
if (!p || !*p)
|
|
goto Linvalid;
|
|
|
|
if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
|
|
goto Linvalid;
|
|
|
|
len = strlen(p);
|
|
idx = 0;
|
|
while (p[idx])
|
|
{ dchar_t dc;
|
|
|
|
char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc);
|
|
if (q)
|
|
goto Linvalid;
|
|
|
|
if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
|
|
goto Linvalid;
|
|
}
|
|
return 1;
|
|
|
|
Linvalid:
|
|
return 0;
|
|
}
|
|
|
|
/****************************
|
|
* Turn next token in buffer into a token.
|
|
*/
|
|
|
|
void Lexer::scan(Token *t)
|
|
{
|
|
unsigned lastLine = loc.linnum;
|
|
unsigned linnum;
|
|
|
|
t->blockComment = NULL;
|
|
t->lineComment = NULL;
|
|
while (1)
|
|
{
|
|
t->ptr = p;
|
|
//printf("p = %p, *p = '%c'\n",p,*p);
|
|
switch (*p)
|
|
{
|
|
case 0:
|
|
case 0x1A:
|
|
t->value = TOKeof; // end of file
|
|
return;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f':
|
|
p++;
|
|
continue; // skip white space
|
|
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n') // if CR stands by itself
|
|
loc.linnum++;
|
|
continue; // skip white space
|
|
|
|
case '\n':
|
|
p++;
|
|
loc.linnum++;
|
|
continue; // skip white space
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
t->value = number(t);
|
|
return;
|
|
|
|
#if CSTRINGS
|
|
case '\'':
|
|
t->value = charConstant(t, 0);
|
|
return;
|
|
|
|
case '"':
|
|
t->value = stringConstant(t,0);
|
|
return;
|
|
|
|
case 'l':
|
|
case 'L':
|
|
if (p[1] == '\'')
|
|
{
|
|
p++;
|
|
t->value = charConstant(t, 1);
|
|
return;
|
|
}
|
|
else if (p[1] == '"')
|
|
{
|
|
p++;
|
|
t->value = stringConstant(t, 1);
|
|
return;
|
|
}
|
|
#else
|
|
case '\'':
|
|
t->value = charConstant(t,0);
|
|
return;
|
|
|
|
case 'r':
|
|
if (p[1] != '"')
|
|
goto case_ident;
|
|
p++;
|
|
case '`':
|
|
t->value = wysiwygStringConstant(t, *p);
|
|
return;
|
|
|
|
case 'x':
|
|
if (p[1] != '"')
|
|
goto case_ident;
|
|
p++;
|
|
t->value = hexStringConstant(t);
|
|
return;
|
|
|
|
#if V2
|
|
case 'q':
|
|
if (p[1] == '"')
|
|
{
|
|
p++;
|
|
t->value = delimitedStringConstant(t);
|
|
return;
|
|
}
|
|
else if (p[1] == '{')
|
|
{
|
|
p++;
|
|
t->value = tokenStringConstant(t);
|
|
return;
|
|
}
|
|
else
|
|
goto case_ident;
|
|
#endif
|
|
|
|
case '"':
|
|
t->value = escapeStringConstant(t,0);
|
|
return;
|
|
|
|
case '\\': // escaped string literal
|
|
{ unsigned c;
|
|
|
|
stringbuffer.reset();
|
|
do
|
|
{
|
|
p++;
|
|
switch (*p)
|
|
{
|
|
case 'u':
|
|
case 'U':
|
|
case '&':
|
|
c = escapeSequence();
|
|
stringbuffer.writeUTF8(c);
|
|
break;
|
|
|
|
default:
|
|
c = escapeSequence();
|
|
stringbuffer.writeByte(c);
|
|
break;
|
|
}
|
|
} while (*p == '\\');
|
|
t->len = stringbuffer.offset;
|
|
stringbuffer.writeByte(0);
|
|
t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
t->postfix = 0;
|
|
t->value = TOKstring;
|
|
return;
|
|
}
|
|
|
|
case 'l':
|
|
case 'L':
|
|
#endif
|
|
case 'a': case 'b': case 'c': case 'd': case 'e':
|
|
case 'f': case 'g': case 'h': case 'i': case 'j':
|
|
case 'k': case 'm': case 'n': case 'o':
|
|
#if V2
|
|
case 'p': /*case 'q': case 'r':*/ case 's': case 't':
|
|
#else
|
|
case 'p': case 'q': /*case 'r':*/ case 's': case 't':
|
|
#endif
|
|
case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
|
|
case 'z':
|
|
case 'A': case 'B': case 'C': case 'D': case 'E':
|
|
case 'F': case 'G': case 'H': case 'I': case 'J':
|
|
case 'K': case 'M': case 'N': case 'O':
|
|
case 'P': case 'Q': case 'R': case 'S': case 'T':
|
|
case 'U': case 'V': case 'W': case 'X': case 'Y':
|
|
case 'Z':
|
|
case '_':
|
|
case_ident:
|
|
{ unsigned char c;
|
|
StringValue *sv;
|
|
Identifier *id;
|
|
|
|
do
|
|
{
|
|
c = *++p;
|
|
} while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
|
|
sv = stringtable.update((char *)t->ptr, p - t->ptr);
|
|
id = (Identifier *) sv->ptrvalue;
|
|
if (!id)
|
|
{ id = new Identifier(sv->lstring.string,TOKidentifier);
|
|
sv->ptrvalue = id;
|
|
}
|
|
t->ident = id;
|
|
t->value = (enum TOK) id->value;
|
|
anyToken = 1;
|
|
if (*t->ptr == '_') // if special identifier token
|
|
{
|
|
static char date[11+1];
|
|
static char time[8+1];
|
|
static char timestamp[24+1];
|
|
|
|
if (!date[0]) // lazy evaluation
|
|
{ time_t t;
|
|
char *p;
|
|
|
|
::time(&t);
|
|
p = ctime(&t);
|
|
assert(p);
|
|
sprintf(date, "%.6s %.4s", p + 4, p + 20);
|
|
sprintf(time, "%.8s", p + 11);
|
|
sprintf(timestamp, "%.24s", p);
|
|
}
|
|
|
|
if (mod && id == Id::FILE)
|
|
{
|
|
t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars());
|
|
goto Lstring;
|
|
}
|
|
else if (mod && id == Id::LINE)
|
|
{
|
|
t->value = TOKint64v;
|
|
t->uns64value = loc.linnum;
|
|
}
|
|
else if (id == Id::DATE)
|
|
{
|
|
t->ustring = (unsigned char *)date;
|
|
goto Lstring;
|
|
}
|
|
else if (id == Id::TIME)
|
|
{
|
|
t->ustring = (unsigned char *)time;
|
|
goto Lstring;
|
|
}
|
|
else if (id == Id::VENDOR)
|
|
{
|
|
t->ustring = (unsigned char *)"Digital Mars D";
|
|
goto Lstring;
|
|
}
|
|
else if (id == Id::TIMESTAMP)
|
|
{
|
|
t->ustring = (unsigned char *)timestamp;
|
|
Lstring:
|
|
t->value = TOKstring;
|
|
Llen:
|
|
t->postfix = 0;
|
|
t->len = strlen((char *)t->ustring);
|
|
}
|
|
else if (id == Id::VERSIONX)
|
|
{ unsigned major = 0;
|
|
unsigned minor = 0;
|
|
|
|
for (char *p = global.version + 1; 1; p++)
|
|
{
|
|
char c = *p;
|
|
if (isdigit(c))
|
|
minor = minor * 10 + c - '0';
|
|
else if (c == '.')
|
|
{ major = minor;
|
|
minor = 0;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
t->value = TOKint64v;
|
|
t->uns64value = major * 1000 + minor;
|
|
}
|
|
#if V2
|
|
else if (id == Id::EOFX)
|
|
{
|
|
t->value = TOKeof;
|
|
// Advance scanner to end of file
|
|
while (!(*p == 0 || *p == 0x1A))
|
|
p++;
|
|
}
|
|
#endif
|
|
}
|
|
//printf("t->value = %d\n",t->value);
|
|
return;
|
|
}
|
|
|
|
case '/':
|
|
p++;
|
|
switch (*p)
|
|
{
|
|
case '=':
|
|
p++;
|
|
t->value = TOKdivass;
|
|
return;
|
|
|
|
case '*':
|
|
p++;
|
|
linnum = loc.linnum;
|
|
while (1)
|
|
{
|
|
while (1)
|
|
{ unsigned char c = *p;
|
|
switch (c)
|
|
{
|
|
case '/':
|
|
break;
|
|
|
|
case '\n':
|
|
loc.linnum++;
|
|
p++;
|
|
continue;
|
|
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n')
|
|
loc.linnum++;
|
|
continue;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated /* */ comment");
|
|
p = end;
|
|
t->value = TOKeof;
|
|
return;
|
|
|
|
default:
|
|
if (c & 0x80)
|
|
{ unsigned u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
loc.linnum++;
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
p++;
|
|
if (p[-2] == '*' && p - 3 != t->ptr)
|
|
break;
|
|
}
|
|
if (commentToken)
|
|
{
|
|
t->value = TOKcomment;
|
|
return;
|
|
}
|
|
else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
|
|
{ // if /** but not /**/
|
|
getDocComment(t, lastLine == linnum);
|
|
}
|
|
continue;
|
|
|
|
case '/': // do // style comments
|
|
linnum = loc.linnum;
|
|
while (1)
|
|
{ unsigned char c = *++p;
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
break;
|
|
|
|
case '\r':
|
|
if (p[1] == '\n')
|
|
p++;
|
|
break;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
if (commentToken)
|
|
{
|
|
p = end;
|
|
t->value = TOKcomment;
|
|
return;
|
|
}
|
|
if (doDocComment && t->ptr[2] == '/')
|
|
getDocComment(t, lastLine == linnum);
|
|
p = end;
|
|
t->value = TOKeof;
|
|
return;
|
|
|
|
default:
|
|
if (c & 0x80)
|
|
{ unsigned u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (commentToken)
|
|
{
|
|
p++;
|
|
loc.linnum++;
|
|
t->value = TOKcomment;
|
|
return;
|
|
}
|
|
if (doDocComment && t->ptr[2] == '/')
|
|
getDocComment(t, lastLine == linnum);
|
|
|
|
p++;
|
|
loc.linnum++;
|
|
continue;
|
|
|
|
case '+':
|
|
{ int nest;
|
|
|
|
linnum = loc.linnum;
|
|
p++;
|
|
nest = 1;
|
|
while (1)
|
|
{ unsigned char c = *p;
|
|
switch (c)
|
|
{
|
|
case '/':
|
|
p++;
|
|
if (*p == '+')
|
|
{
|
|
p++;
|
|
nest++;
|
|
}
|
|
continue;
|
|
|
|
case '+':
|
|
p++;
|
|
if (*p == '/')
|
|
{
|
|
p++;
|
|
if (--nest == 0)
|
|
break;
|
|
}
|
|
continue;
|
|
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n')
|
|
loc.linnum++;
|
|
continue;
|
|
|
|
case '\n':
|
|
loc.linnum++;
|
|
p++;
|
|
continue;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated /+ +/ comment");
|
|
p = end;
|
|
t->value = TOKeof;
|
|
return;
|
|
|
|
default:
|
|
if (c & 0x80)
|
|
{ unsigned u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
loc.linnum++;
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (commentToken)
|
|
{
|
|
t->value = TOKcomment;
|
|
return;
|
|
}
|
|
if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
|
|
{ // if /++ but not /++/
|
|
getDocComment(t, lastLine == linnum);
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
t->value = TOKdiv;
|
|
return;
|
|
|
|
case '.':
|
|
p++;
|
|
if (isdigit(*p))
|
|
{ /* Note that we don't allow ._1 and ._ as being
|
|
* valid floating point numbers.
|
|
*/
|
|
p--;
|
|
t->value = inreal(t);
|
|
}
|
|
else if (p[0] == '.')
|
|
{
|
|
if (p[1] == '.')
|
|
{ p += 2;
|
|
t->value = TOKdotdotdot;
|
|
}
|
|
else
|
|
{ p++;
|
|
t->value = TOKslice;
|
|
}
|
|
}
|
|
else
|
|
t->value = TOKdot;
|
|
return;
|
|
|
|
case '&':
|
|
p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKandass;
|
|
}
|
|
else if (*p == '&')
|
|
{ p++;
|
|
t->value = TOKandand;
|
|
}
|
|
else
|
|
t->value = TOKand;
|
|
return;
|
|
|
|
case '|':
|
|
p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKorass;
|
|
}
|
|
else if (*p == '|')
|
|
{ p++;
|
|
t->value = TOKoror;
|
|
}
|
|
else
|
|
t->value = TOKor;
|
|
return;
|
|
|
|
case '-':
|
|
p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKminass;
|
|
}
|
|
#if 0
|
|
else if (*p == '>')
|
|
{ p++;
|
|
t->value = TOKarrow;
|
|
}
|
|
#endif
|
|
else if (*p == '-')
|
|
{ p++;
|
|
t->value = TOKminusminus;
|
|
}
|
|
else
|
|
t->value = TOKmin;
|
|
return;
|
|
|
|
case '+':
|
|
p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKaddass;
|
|
}
|
|
else if (*p == '+')
|
|
{ p++;
|
|
t->value = TOKplusplus;
|
|
}
|
|
else
|
|
t->value = TOKadd;
|
|
return;
|
|
|
|
case '<':
|
|
p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKle; // <=
|
|
}
|
|
else if (*p == '<')
|
|
{ p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKshlass; // <<=
|
|
}
|
|
else
|
|
t->value = TOKshl; // <<
|
|
}
|
|
else if (*p == '>')
|
|
{ p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKleg; // <>=
|
|
}
|
|
else
|
|
t->value = TOKlg; // <>
|
|
}
|
|
else
|
|
t->value = TOKlt; // <
|
|
return;
|
|
|
|
case '>':
|
|
p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKge; // >=
|
|
}
|
|
else if (*p == '>')
|
|
{ p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKshrass; // >>=
|
|
}
|
|
else if (*p == '>')
|
|
{ p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKushrass; // >>>=
|
|
}
|
|
else
|
|
t->value = TOKushr; // >>>
|
|
}
|
|
else
|
|
t->value = TOKshr; // >>
|
|
}
|
|
else
|
|
t->value = TOKgt; // >
|
|
return;
|
|
|
|
case '!':
|
|
p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
if (*p == '=' && global.params.Dversion == 1)
|
|
{ p++;
|
|
t->value = TOKnotidentity; // !==
|
|
}
|
|
else
|
|
t->value = TOKnotequal; // !=
|
|
}
|
|
else if (*p == '<')
|
|
{ p++;
|
|
if (*p == '>')
|
|
{ p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKunord; // !<>=
|
|
}
|
|
else
|
|
t->value = TOKue; // !<>
|
|
}
|
|
else if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKug; // !<=
|
|
}
|
|
else
|
|
t->value = TOKuge; // !<
|
|
}
|
|
else if (*p == '>')
|
|
{ p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKul; // !>=
|
|
}
|
|
else
|
|
t->value = TOKule; // !>
|
|
}
|
|
else
|
|
t->value = TOKnot; // !
|
|
return;
|
|
|
|
case '=':
|
|
p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
if (*p == '=' && global.params.Dversion == 1)
|
|
{ p++;
|
|
t->value = TOKidentity; // ===
|
|
}
|
|
else
|
|
t->value = TOKequal; // ==
|
|
}
|
|
else
|
|
t->value = TOKassign; // =
|
|
return;
|
|
|
|
case '~':
|
|
p++;
|
|
if (*p == '=')
|
|
{ p++;
|
|
t->value = TOKcatass; // ~=
|
|
}
|
|
else
|
|
t->value = TOKtilde; // ~
|
|
return;
|
|
|
|
#define SINGLE(c,tok) case c: p++; t->value = tok; return;
|
|
|
|
SINGLE('(', TOKlparen)
|
|
SINGLE(')', TOKrparen)
|
|
SINGLE('[', TOKlbracket)
|
|
SINGLE(']', TOKrbracket)
|
|
SINGLE('{', TOKlcurly)
|
|
SINGLE('}', TOKrcurly)
|
|
SINGLE('?', TOKquestion)
|
|
SINGLE(',', TOKcomma)
|
|
SINGLE(';', TOKsemicolon)
|
|
SINGLE(':', TOKcolon)
|
|
SINGLE('$', TOKdollar)
|
|
|
|
#undef SINGLE
|
|
|
|
#define DOUBLE(c1,tok1,c2,tok2) \
|
|
case c1: \
|
|
p++; \
|
|
if (*p == c2) \
|
|
{ p++; \
|
|
t->value = tok2; \
|
|
} \
|
|
else \
|
|
t->value = tok1; \
|
|
return;
|
|
|
|
DOUBLE('*', TOKmul, '=', TOKmulass)
|
|
DOUBLE('%', TOKmod, '=', TOKmodass)
|
|
DOUBLE('^', TOKxor, '=', TOKxorass)
|
|
|
|
#undef DOUBLE
|
|
|
|
case '#':
|
|
p++;
|
|
pragma();
|
|
continue;
|
|
|
|
default:
|
|
{ unsigned char c = *p;
|
|
|
|
if (c & 0x80)
|
|
{ unsigned u = decodeUTF();
|
|
|
|
// Check for start of unicode identifier
|
|
if (isUniAlpha(u))
|
|
goto case_ident;
|
|
|
|
if (u == PS || u == LS)
|
|
{
|
|
loc.linnum++;
|
|
p++;
|
|
continue;
|
|
}
|
|
}
|
|
if (isprint(c))
|
|
error("unsupported char '%c'", c);
|
|
else
|
|
error("unsupported char 0x%02x", c);
|
|
p++;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*******************************************
|
|
* Parse escape sequence.
|
|
*/
|
|
|
|
unsigned Lexer::escapeSequence()
|
|
{ unsigned c;
|
|
int n;
|
|
int ndigits;
|
|
|
|
c = *p;
|
|
switch (c)
|
|
{
|
|
case '\'':
|
|
case '"':
|
|
case '?':
|
|
case '\\':
|
|
Lconsume:
|
|
p++;
|
|
break;
|
|
|
|
case 'a': c = 7; goto Lconsume;
|
|
case 'b': c = 8; goto Lconsume;
|
|
case 'f': c = 12; goto Lconsume;
|
|
case 'n': c = 10; goto Lconsume;
|
|
case 'r': c = 13; goto Lconsume;
|
|
case 't': c = 9; goto Lconsume;
|
|
case 'v': c = 11; goto Lconsume;
|
|
|
|
case 'u':
|
|
ndigits = 4;
|
|
goto Lhex;
|
|
case 'U':
|
|
ndigits = 8;
|
|
goto Lhex;
|
|
case 'x':
|
|
ndigits = 2;
|
|
Lhex:
|
|
p++;
|
|
c = *p;
|
|
if (ishex(c))
|
|
{ unsigned v;
|
|
|
|
n = 0;
|
|
v = 0;
|
|
while (1)
|
|
{
|
|
if (isdigit(c))
|
|
c -= '0';
|
|
else if (islower(c))
|
|
c -= 'a' - 10;
|
|
else
|
|
c -= 'A' - 10;
|
|
v = v * 16 + c;
|
|
c = *++p;
|
|
if (++n == ndigits)
|
|
break;
|
|
if (!ishex(c))
|
|
{ error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
|
|
break;
|
|
}
|
|
}
|
|
if (ndigits != 2 && !utf_isValidDchar(v))
|
|
error("invalid UTF character \\U%08x", v);
|
|
c = v;
|
|
}
|
|
else
|
|
error("undefined escape hex sequence \\%c\n",c);
|
|
break;
|
|
|
|
case '&': // named character entity
|
|
for (unsigned char *idstart = ++p; 1; p++)
|
|
{
|
|
switch (*p)
|
|
{
|
|
case ';':
|
|
c = HtmlNamedEntity(idstart, p - idstart);
|
|
if (c == ~0)
|
|
{ error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
|
|
c = ' ';
|
|
}
|
|
p++;
|
|
break;
|
|
|
|
default:
|
|
if (isalpha(*p) ||
|
|
(p != idstart + 1 && isdigit(*p)))
|
|
continue;
|
|
error("unterminated named entity");
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 0:
|
|
case 0x1A: // end of file
|
|
c = '\\';
|
|
break;
|
|
|
|
default:
|
|
if (isoctal(c))
|
|
{ unsigned v;
|
|
|
|
n = 0;
|
|
v = 0;
|
|
do
|
|
{
|
|
v = v * 8 + (c - '0');
|
|
c = *++p;
|
|
} while (++n < 3 && isoctal(c));
|
|
c = v;
|
|
if (c > 0xFF)
|
|
error("0%03o is larger than a byte", c);
|
|
}
|
|
else
|
|
error("undefined escape sequence \\%c\n",c);
|
|
break;
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/**************************************
|
|
*/
|
|
|
|
TOK Lexer::wysiwygStringConstant(Token *t, int tc)
|
|
{ unsigned c;
|
|
Loc start = loc;
|
|
|
|
p++;
|
|
stringbuffer.reset();
|
|
while (1)
|
|
{
|
|
c = *p++;
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
loc.linnum++;
|
|
break;
|
|
|
|
case '\r':
|
|
if (*p == '\n')
|
|
continue; // ignore
|
|
c = '\n'; // treat EndOfLine as \n character
|
|
loc.linnum++;
|
|
break;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t->ustring = (unsigned char *)"";
|
|
t->len = 0;
|
|
t->postfix = 0;
|
|
return TOKstring;
|
|
|
|
case '"':
|
|
case '`':
|
|
if (c == tc)
|
|
{
|
|
t->len = stringbuffer.offset;
|
|
stringbuffer.writeByte(0);
|
|
t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
stringPostfix(t);
|
|
return TOKstring;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
if (c & 0x80)
|
|
{ p--;
|
|
unsigned u = decodeUTF();
|
|
p++;
|
|
if (u == PS || u == LS)
|
|
loc.linnum++;
|
|
stringbuffer.writeUTF8(u);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
stringbuffer.writeByte(c);
|
|
}
|
|
}
|
|
|
|
/**************************************
|
|
* Lex hex strings:
|
|
* x"0A ae 34FE BD"
|
|
*/
|
|
|
|
TOK Lexer::hexStringConstant(Token *t)
|
|
{ unsigned c;
|
|
Loc start = loc;
|
|
unsigned n = 0;
|
|
unsigned v;
|
|
|
|
p++;
|
|
stringbuffer.reset();
|
|
while (1)
|
|
{
|
|
c = *p++;
|
|
switch (c)
|
|
{
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f':
|
|
continue; // skip white space
|
|
|
|
case '\r':
|
|
if (*p == '\n')
|
|
continue; // ignore
|
|
// Treat isolated '\r' as if it were a '\n'
|
|
case '\n':
|
|
loc.linnum++;
|
|
continue;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t->ustring = (unsigned char *)"";
|
|
t->len = 0;
|
|
t->postfix = 0;
|
|
return TOKstring;
|
|
|
|
case '"':
|
|
if (n & 1)
|
|
{ error("odd number (%d) of hex characters in hex string", n);
|
|
stringbuffer.writeByte(v);
|
|
}
|
|
t->len = stringbuffer.offset;
|
|
stringbuffer.writeByte(0);
|
|
t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
stringPostfix(t);
|
|
return TOKstring;
|
|
|
|
default:
|
|
if (c >= '0' && c <= '9')
|
|
c -= '0';
|
|
else if (c >= 'a' && c <= 'f')
|
|
c -= 'a' - 10;
|
|
else if (c >= 'A' && c <= 'F')
|
|
c -= 'A' - 10;
|
|
else if (c & 0x80)
|
|
{ p--;
|
|
unsigned u = decodeUTF();
|
|
p++;
|
|
if (u == PS || u == LS)
|
|
loc.linnum++;
|
|
else
|
|
error("non-hex character \\u%x", u);
|
|
}
|
|
else
|
|
error("non-hex character '%c'", c);
|
|
if (n & 1)
|
|
{ v = (v << 4) | c;
|
|
stringbuffer.writeByte(v);
|
|
}
|
|
else
|
|
v = c;
|
|
n++;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
#if V2
|
|
/**************************************
|
|
* Lex delimited strings:
|
|
* q"(foo(xxx))" // "foo(xxx)"
|
|
* q"[foo(]" // "foo("
|
|
* q"/foo]/" // "foo]"
|
|
* q"HERE
|
|
* foo
|
|
* HERE" // "foo\n"
|
|
* Input:
|
|
* p is on the "
|
|
*/
|
|
|
|
TOK Lexer::delimitedStringConstant(Token *t)
|
|
{ unsigned c;
|
|
Loc start = loc;
|
|
unsigned delimleft = 0;
|
|
unsigned delimright = 0;
|
|
unsigned nest = 1;
|
|
unsigned nestcount;
|
|
Identifier *hereid = NULL;
|
|
unsigned blankrol = 0;
|
|
unsigned startline = 0;
|
|
|
|
p++;
|
|
stringbuffer.reset();
|
|
while (1)
|
|
{
|
|
c = *p++;
|
|
//printf("c = '%c'\n", c);
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
Lnextline:
|
|
loc.linnum++;
|
|
startline = 1;
|
|
if (blankrol)
|
|
{ blankrol = 0;
|
|
continue;
|
|
}
|
|
if (hereid)
|
|
{
|
|
stringbuffer.writeUTF8(c);
|
|
continue;
|
|
}
|
|
break;
|
|
|
|
case '\r':
|
|
if (*p == '\n')
|
|
continue; // ignore
|
|
c = '\n'; // treat EndOfLine as \n character
|
|
goto Lnextline;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
goto Lerror;
|
|
|
|
default:
|
|
if (c & 0x80)
|
|
{ p--;
|
|
c = decodeUTF();
|
|
p++;
|
|
if (c == PS || c == LS)
|
|
goto Lnextline;
|
|
}
|
|
break;
|
|
}
|
|
if (delimleft == 0)
|
|
{ delimleft = c;
|
|
nest = 1;
|
|
nestcount = 1;
|
|
if (c == '(')
|
|
delimright = ')';
|
|
else if (c == '{')
|
|
delimright = '}';
|
|
else if (c == '[')
|
|
delimright = ']';
|
|
else if (c == '<')
|
|
delimright = '>';
|
|
else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
|
|
{ // Start of identifier; must be a heredoc
|
|
Token t;
|
|
p--;
|
|
scan(&t); // read in heredoc identifier
|
|
if (t.value != TOKidentifier)
|
|
{ error("identifier expected for heredoc, not %s", t.toChars());
|
|
delimright = c;
|
|
}
|
|
else
|
|
{ hereid = t.ident;
|
|
//printf("hereid = '%s'\n", hereid->toChars());
|
|
blankrol = 1;
|
|
}
|
|
nest = 0;
|
|
}
|
|
else
|
|
{ delimright = c;
|
|
nest = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (blankrol)
|
|
{ error("heredoc rest of line should be blank");
|
|
blankrol = 0;
|
|
continue;
|
|
}
|
|
if (nest == 1)
|
|
{
|
|
if (c == delimleft)
|
|
nestcount++;
|
|
else if (c == delimright)
|
|
{ nestcount--;
|
|
if (nestcount == 0)
|
|
goto Ldone;
|
|
}
|
|
}
|
|
else if (c == delimright)
|
|
goto Ldone;
|
|
if (startline && isalpha(c))
|
|
{ Token t;
|
|
unsigned char *psave = p;
|
|
p--;
|
|
scan(&t); // read in possible heredoc identifier
|
|
//printf("endid = '%s'\n", t.ident->toChars());
|
|
if (t.value == TOKidentifier && t.ident->equals(hereid))
|
|
{ /* should check that rest of line is blank
|
|
*/
|
|
goto Ldone;
|
|
}
|
|
p = psave;
|
|
}
|
|
stringbuffer.writeUTF8(c);
|
|
startline = 0;
|
|
}
|
|
}
|
|
|
|
Ldone:
|
|
if (*p == '"')
|
|
p++;
|
|
else
|
|
error("delimited string must end in %c\"", delimright);
|
|
t->len = stringbuffer.offset;
|
|
stringbuffer.writeByte(0);
|
|
t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
stringPostfix(t);
|
|
return TOKstring;
|
|
|
|
Lerror:
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t->ustring = (unsigned char *)"";
|
|
t->len = 0;
|
|
t->postfix = 0;
|
|
return TOKstring;
|
|
}
|
|
|
|
/**************************************
|
|
* Lex delimited strings:
|
|
* q{ foo(xxx) } // " foo(xxx) "
|
|
* q{foo(} // "foo("
|
|
* q{{foo}"}"} // "{foo}"}""
|
|
* Input:
|
|
* p is on the q
|
|
*/
|
|
|
|
TOK Lexer::tokenStringConstant(Token *t)
|
|
{
|
|
unsigned nest = 1;
|
|
Loc start = loc;
|
|
unsigned char *pstart = ++p;
|
|
|
|
while (1)
|
|
{ Token tok;
|
|
|
|
scan(&tok);
|
|
switch (tok.value)
|
|
{
|
|
case TOKlcurly:
|
|
nest++;
|
|
continue;
|
|
|
|
case TOKrcurly:
|
|
if (--nest == 0)
|
|
goto Ldone;
|
|
continue;
|
|
|
|
case TOKeof:
|
|
goto Lerror;
|
|
|
|
default:
|
|
continue;
|
|
}
|
|
}
|
|
|
|
Ldone:
|
|
t->len = p - 1 - pstart;
|
|
t->ustring = (unsigned char *)mem.malloc(t->len + 1);
|
|
memcpy(t->ustring, pstart, t->len);
|
|
t->ustring[t->len] = 0;
|
|
stringPostfix(t);
|
|
return TOKstring;
|
|
|
|
Lerror:
|
|
error("unterminated token string constant starting at %s", start.toChars());
|
|
t->ustring = (unsigned char *)"";
|
|
t->len = 0;
|
|
t->postfix = 0;
|
|
return TOKstring;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
/**************************************
|
|
*/
|
|
|
|
TOK Lexer::escapeStringConstant(Token *t, int wide)
|
|
{ unsigned c;
|
|
Loc start = loc;
|
|
|
|
p++;
|
|
stringbuffer.reset();
|
|
while (1)
|
|
{
|
|
c = *p++;
|
|
switch (c)
|
|
{
|
|
case '\\':
|
|
switch (*p)
|
|
{
|
|
case 'u':
|
|
case 'U':
|
|
case '&':
|
|
c = escapeSequence();
|
|
stringbuffer.writeUTF8(c);
|
|
continue;
|
|
|
|
default:
|
|
c = escapeSequence();
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case '\n':
|
|
loc.linnum++;
|
|
break;
|
|
|
|
case '\r':
|
|
if (*p == '\n')
|
|
continue; // ignore
|
|
c = '\n'; // treat EndOfLine as \n character
|
|
loc.linnum++;
|
|
break;
|
|
|
|
case '"':
|
|
t->len = stringbuffer.offset;
|
|
stringbuffer.writeByte(0);
|
|
t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
|
|
memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
|
|
stringPostfix(t);
|
|
return TOKstring;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
p--;
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t->ustring = (unsigned char *)"";
|
|
t->len = 0;
|
|
t->postfix = 0;
|
|
return TOKstring;
|
|
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
p--;
|
|
c = decodeUTF();
|
|
if (c == LS || c == PS)
|
|
{ c = '\n';
|
|
loc.linnum++;
|
|
}
|
|
p++;
|
|
stringbuffer.writeUTF8(c);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
stringbuffer.writeByte(c);
|
|
}
|
|
}
|
|
|
|
/**************************************
|
|
*/
|
|
|
|
TOK Lexer::charConstant(Token *t, int wide)
|
|
{
|
|
unsigned c;
|
|
TOK tk = TOKcharv;
|
|
|
|
//printf("Lexer::charConstant\n");
|
|
p++;
|
|
c = *p++;
|
|
switch (c)
|
|
{
|
|
case '\\':
|
|
switch (*p)
|
|
{
|
|
case 'u':
|
|
t->uns64value = escapeSequence();
|
|
tk = TOKwcharv;
|
|
break;
|
|
|
|
case 'U':
|
|
case '&':
|
|
t->uns64value = escapeSequence();
|
|
tk = TOKdcharv;
|
|
break;
|
|
|
|
default:
|
|
t->uns64value = escapeSequence();
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case '\n':
|
|
L1:
|
|
loc.linnum++;
|
|
case '\r':
|
|
case 0:
|
|
case 0x1A:
|
|
case '\'':
|
|
error("unterminated character constant");
|
|
return tk;
|
|
|
|
default:
|
|
if (c & 0x80)
|
|
{
|
|
p--;
|
|
c = decodeUTF();
|
|
p++;
|
|
if (c == LS || c == PS)
|
|
goto L1;
|
|
if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
|
|
tk = TOKwcharv;
|
|
else
|
|
tk = TOKdcharv;
|
|
}
|
|
t->uns64value = c;
|
|
break;
|
|
}
|
|
|
|
if (*p != '\'')
|
|
{ error("unterminated character constant");
|
|
return tk;
|
|
}
|
|
p++;
|
|
return tk;
|
|
}
|
|
|
|
/***************************************
|
|
* Get postfix of string literal.
|
|
*/
|
|
|
|
void Lexer::stringPostfix(Token *t)
|
|
{
|
|
switch (*p)
|
|
{
|
|
case 'c':
|
|
case 'w':
|
|
case 'd':
|
|
t->postfix = *p;
|
|
p++;
|
|
break;
|
|
|
|
default:
|
|
t->postfix = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/***************************************
|
|
* Read \u or \U unicode sequence
|
|
* Input:
|
|
* u 'u' or 'U'
|
|
*/
|
|
|
|
#if 0
|
|
unsigned Lexer::wchar(unsigned u)
|
|
{
|
|
unsigned value;
|
|
unsigned n;
|
|
unsigned char c;
|
|
unsigned nchars;
|
|
|
|
nchars = (u == 'U') ? 8 : 4;
|
|
value = 0;
|
|
for (n = 0; 1; n++)
|
|
{
|
|
++p;
|
|
if (n == nchars)
|
|
break;
|
|
c = *p;
|
|
if (!ishex(c))
|
|
{ error("\\%c sequence must be followed by %d hex characters", u, nchars);
|
|
break;
|
|
}
|
|
if (isdigit(c))
|
|
c -= '0';
|
|
else if (islower(c))
|
|
c -= 'a' - 10;
|
|
else
|
|
c -= 'A' - 10;
|
|
value <<= 4;
|
|
value |= c;
|
|
}
|
|
return value;
|
|
}
|
|
#endif
|
|
|
|
/**************************************
|
|
* Read in a number.
|
|
* If it's an integer, store it in tok.TKutok.Vlong.
|
|
* integers can be decimal, octal or hex
|
|
* Handle the suffixes U, UL, LU, L, etc.
|
|
* If it's double, store it in tok.TKutok.Vdouble.
|
|
* Returns:
|
|
* TKnum
|
|
* TKdouble,...
|
|
*/
|
|
|
|
TOK Lexer::number(Token *t)
|
|
{
|
|
// We use a state machine to collect numbers
|
|
enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
|
|
STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
|
|
STATE_hexh, STATE_error };
|
|
enum STATE state;
|
|
|
|
enum FLAGS
|
|
{ FLAGS_decimal = 1, // decimal
|
|
FLAGS_unsigned = 2, // u or U suffix
|
|
FLAGS_long = 4, // l or L suffix
|
|
};
|
|
enum FLAGS flags = FLAGS_decimal;
|
|
|
|
int i;
|
|
int base;
|
|
unsigned c;
|
|
unsigned char *start;
|
|
TOK result;
|
|
|
|
//printf("Lexer::number()\n");
|
|
state = STATE_initial;
|
|
base = 0;
|
|
stringbuffer.reset();
|
|
start = p;
|
|
while (1)
|
|
{
|
|
c = *p;
|
|
switch (state)
|
|
{
|
|
case STATE_initial: // opening state
|
|
if (c == '0')
|
|
state = STATE_0;
|
|
else
|
|
state = STATE_decimal;
|
|
break;
|
|
|
|
case STATE_0:
|
|
flags = (FLAGS) (flags & ~FLAGS_decimal);
|
|
switch (c)
|
|
{
|
|
#if ZEROH
|
|
case 'H': // 0h
|
|
case 'h':
|
|
goto hexh;
|
|
#endif
|
|
case 'X':
|
|
case 'x':
|
|
state = STATE_hex0;
|
|
break;
|
|
|
|
case '.':
|
|
if (p[1] == '.') // .. is a separate token
|
|
goto done;
|
|
case 'i':
|
|
case 'f':
|
|
case 'F':
|
|
goto real;
|
|
#if ZEROH
|
|
case 'E':
|
|
case 'e':
|
|
goto case_hex;
|
|
#endif
|
|
case 'B':
|
|
case 'b':
|
|
state = STATE_binary0;
|
|
break;
|
|
|
|
case '0': case '1': case '2': case '3':
|
|
case '4': case '5': case '6': case '7':
|
|
state = STATE_octal;
|
|
break;
|
|
|
|
#if ZEROH
|
|
case '8': case '9': case 'A':
|
|
case 'C': case 'D': case 'F':
|
|
case 'a': case 'c': case 'd': case 'f':
|
|
case_hex:
|
|
state = STATE_hexh;
|
|
break;
|
|
#endif
|
|
case '_':
|
|
state = STATE_octal;
|
|
p++;
|
|
continue;
|
|
|
|
case 'L':
|
|
if (p[1] == 'i')
|
|
goto real;
|
|
goto done;
|
|
|
|
default:
|
|
goto done;
|
|
}
|
|
break;
|
|
|
|
case STATE_decimal: // reading decimal number
|
|
if (!isdigit(c))
|
|
{
|
|
#if ZEROH
|
|
if (ishex(c)
|
|
|| c == 'H' || c == 'h'
|
|
)
|
|
goto hexh;
|
|
#endif
|
|
if (c == '_') // ignore embedded _
|
|
{ p++;
|
|
continue;
|
|
}
|
|
if (c == '.' && p[1] != '.')
|
|
goto real;
|
|
else if (c == 'i' || c == 'f' || c == 'F' ||
|
|
c == 'e' || c == 'E')
|
|
{
|
|
real: // It's a real number. Back up and rescan as a real
|
|
p = start;
|
|
return inreal(t);
|
|
}
|
|
else if (c == 'L' && p[1] == 'i')
|
|
goto real;
|
|
goto done;
|
|
}
|
|
break;
|
|
|
|
case STATE_hex0: // reading hex number
|
|
case STATE_hex:
|
|
if (!ishex(c))
|
|
{
|
|
if (c == '_') // ignore embedded _
|
|
{ p++;
|
|
continue;
|
|
}
|
|
if (c == '.' && p[1] != '.')
|
|
goto real;
|
|
if (c == 'P' || c == 'p' || c == 'i')
|
|
goto real;
|
|
if (state == STATE_hex0)
|
|
error("Hex digit expected, not '%c'", c);
|
|
goto done;
|
|
}
|
|
state = STATE_hex;
|
|
break;
|
|
|
|
#if ZEROH
|
|
hexh:
|
|
state = STATE_hexh;
|
|
case STATE_hexh: // parse numbers like 0FFh
|
|
if (!ishex(c))
|
|
{
|
|
if (c == 'H' || c == 'h')
|
|
{
|
|
p++;
|
|
base = 16;
|
|
goto done;
|
|
}
|
|
else
|
|
{
|
|
// Check for something like 1E3 or 0E24
|
|
if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) ||
|
|
memchr((char *)stringbuffer.data, 'e', stringbuffer.offset))
|
|
goto real;
|
|
error("Hex digit expected, not '%c'", c);
|
|
goto done;
|
|
}
|
|
}
|
|
break;
|
|
#endif
|
|
|
|
case STATE_octal: // reading octal number
|
|
case STATE_octale: // reading octal number with non-octal digits
|
|
if (!isoctal(c))
|
|
{
|
|
#if ZEROH
|
|
if (ishex(c)
|
|
|| c == 'H' || c == 'h'
|
|
)
|
|
goto hexh;
|
|
#endif
|
|
if (c == '_') // ignore embedded _
|
|
{ p++;
|
|
continue;
|
|
}
|
|
if (c == '.' && p[1] != '.')
|
|
goto real;
|
|
if (c == 'i')
|
|
goto real;
|
|
if (isdigit(c))
|
|
{
|
|
state = STATE_octale;
|
|
}
|
|
else
|
|
goto done;
|
|
}
|
|
break;
|
|
|
|
case STATE_binary0: // starting binary number
|
|
case STATE_binary: // reading binary number
|
|
if (c != '0' && c != '1')
|
|
{
|
|
#if ZEROH
|
|
if (ishex(c)
|
|
|| c == 'H' || c == 'h'
|
|
)
|
|
goto hexh;
|
|
#endif
|
|
if (c == '_') // ignore embedded _
|
|
{ p++;
|
|
continue;
|
|
}
|
|
if (state == STATE_binary0)
|
|
{ error("binary digit expected");
|
|
state = STATE_error;
|
|
break;
|
|
}
|
|
else
|
|
goto done;
|
|
}
|
|
state = STATE_binary;
|
|
break;
|
|
|
|
case STATE_error: // for error recovery
|
|
if (!isdigit(c)) // scan until non-digit
|
|
goto done;
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
stringbuffer.writeByte(c);
|
|
p++;
|
|
}
|
|
done:
|
|
stringbuffer.writeByte(0); // terminate string
|
|
if (state == STATE_octale)
|
|
error("Octal digit expected");
|
|
|
|
uinteger_t n; // unsigned >=64 bit integer type
|
|
|
|
if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0))
|
|
n = stringbuffer.data[0] - '0';
|
|
else
|
|
{
|
|
// Convert string to integer
|
|
#if __DMC__
|
|
errno = 0;
|
|
n = strtoull((char *)stringbuffer.data,NULL,base);
|
|
if (errno == ERANGE)
|
|
error("integer overflow");
|
|
#else
|
|
// Not everybody implements strtoull()
|
|
char *p = (char *)stringbuffer.data;
|
|
int r = 10, d;
|
|
|
|
if (*p == '0')
|
|
{
|
|
if (p[1] == 'x' || p[1] == 'X')
|
|
p += 2, r = 16;
|
|
else if (p[1] == 'b' || p[1] == 'B')
|
|
p += 2, r = 2;
|
|
else if (isdigit(p[1]))
|
|
p += 1, r = 8;
|
|
}
|
|
|
|
n = 0;
|
|
while (1)
|
|
{
|
|
if (*p >= '0' && *p <= '9')
|
|
d = *p - '0';
|
|
else if (*p >= 'a' && *p <= 'z')
|
|
d = *p - 'a' + 10;
|
|
else if (*p >= 'A' && *p <= 'Z')
|
|
d = *p - 'A' + 10;
|
|
else
|
|
break;
|
|
if (d >= r)
|
|
break;
|
|
if (n && n * r + d <= n)
|
|
{
|
|
error ("integer overflow");
|
|
break;
|
|
}
|
|
|
|
n = n * r + d;
|
|
p++;
|
|
}
|
|
#endif
|
|
if (sizeof(n) > 8 &&
|
|
n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits
|
|
error("integer overflow");
|
|
}
|
|
|
|
// Parse trailing 'u', 'U', 'l' or 'L' in any combination
|
|
while (1)
|
|
{ unsigned char f;
|
|
|
|
switch (*p)
|
|
{ case 'U':
|
|
case 'u':
|
|
f = FLAGS_unsigned;
|
|
goto L1;
|
|
|
|
case 'l':
|
|
if (1 || !global.params.useDeprecated)
|
|
error("'l' suffix is deprecated, use 'L' instead");
|
|
case 'L':
|
|
f = FLAGS_long;
|
|
L1:
|
|
p++;
|
|
if (flags & f)
|
|
error("unrecognized token");
|
|
flags = (FLAGS) (flags | f);
|
|
continue;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
switch (flags)
|
|
{
|
|
case 0:
|
|
/* Octal or Hexadecimal constant.
|
|
* First that fits: int, uint, long, ulong
|
|
*/
|
|
if (n & 0x8000000000000000LL)
|
|
result = TOKuns64v;
|
|
else if (n & 0xFFFFFFFF00000000LL)
|
|
result = TOKint64v;
|
|
else if (n & 0x80000000)
|
|
result = TOKuns32v;
|
|
else
|
|
result = TOKint32v;
|
|
break;
|
|
|
|
case FLAGS_decimal:
|
|
/* First that fits: int, long, long long
|
|
*/
|
|
if (n & 0x8000000000000000LL)
|
|
{ error("signed integer overflow");
|
|
result = TOKuns64v;
|
|
}
|
|
else if (n & 0xFFFFFFFF80000000LL)
|
|
result = TOKint64v;
|
|
else
|
|
result = TOKint32v;
|
|
break;
|
|
|
|
case FLAGS_unsigned:
|
|
case FLAGS_decimal | FLAGS_unsigned:
|
|
/* First that fits: uint, ulong
|
|
*/
|
|
if (n & 0xFFFFFFFF00000000LL)
|
|
result = TOKuns64v;
|
|
else
|
|
result = TOKuns32v;
|
|
break;
|
|
|
|
case FLAGS_decimal | FLAGS_long:
|
|
if (n & 0x8000000000000000LL)
|
|
{ error("signed integer overflow");
|
|
result = TOKuns64v;
|
|
}
|
|
else
|
|
result = TOKint64v;
|
|
break;
|
|
|
|
case FLAGS_long:
|
|
if (n & 0x8000000000000000LL)
|
|
result = TOKuns64v;
|
|
else
|
|
result = TOKint64v;
|
|
break;
|
|
|
|
case FLAGS_unsigned | FLAGS_long:
|
|
case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
|
|
result = TOKuns64v;
|
|
break;
|
|
|
|
default:
|
|
#ifdef DEBUG
|
|
printf("%x\n",flags);
|
|
#endif
|
|
assert(0);
|
|
}
|
|
t->uns64value = n;
|
|
return result;
|
|
}
|
|
|
|
/**************************************
|
|
* Read in characters, converting them to real.
|
|
* Bugs:
|
|
* Exponent overflow not detected.
|
|
* Too much requested precision is not detected.
|
|
*/
|
|
|
|
TOK Lexer::inreal(Token *t)
|
|
#ifdef __DMC__
|
|
__in
|
|
{
|
|
assert(*p == '.' || isdigit(*p));
|
|
}
|
|
__out (result)
|
|
{
|
|
switch (result)
|
|
{
|
|
case TOKfloat32v:
|
|
case TOKfloat64v:
|
|
case TOKfloat80v:
|
|
case TOKimaginary32v:
|
|
case TOKimaginary64v:
|
|
case TOKimaginary80v:
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
__body
|
|
#endif /* __DMC__ */
|
|
{ int dblstate;
|
|
unsigned c;
|
|
char hex; // is this a hexadecimal-floating-constant?
|
|
TOK result;
|
|
|
|
//printf("Lexer::inreal()\n");
|
|
stringbuffer.reset();
|
|
dblstate = 0;
|
|
hex = 0;
|
|
Lnext:
|
|
while (1)
|
|
{
|
|
// Get next char from input
|
|
c = *p++;
|
|
//printf("dblstate = %d, c = '%c'\n", dblstate, c);
|
|
while (1)
|
|
{
|
|
switch (dblstate)
|
|
{
|
|
case 0: // opening state
|
|
if (c == '0')
|
|
dblstate = 9;
|
|
else if (c == '.')
|
|
dblstate = 3;
|
|
else
|
|
dblstate = 1;
|
|
break;
|
|
|
|
case 9:
|
|
dblstate = 1;
|
|
if (c == 'X' || c == 'x')
|
|
{ hex++;
|
|
break;
|
|
}
|
|
case 1: // digits to left of .
|
|
case 3: // digits to right of .
|
|
case 7: // continuing exponent digits
|
|
if (!isdigit(c) && !(hex && isxdigit(c)))
|
|
{
|
|
if (c == '_')
|
|
goto Lnext; // ignore embedded '_'
|
|
dblstate++;
|
|
continue;
|
|
}
|
|
break;
|
|
|
|
case 2: // no more digits to left of .
|
|
if (c == '.')
|
|
{ dblstate++;
|
|
break;
|
|
}
|
|
case 4: // no more digits to right of .
|
|
if ((c == 'E' || c == 'e') ||
|
|
hex && (c == 'P' || c == 'p'))
|
|
{ dblstate = 5;
|
|
hex = 0; // exponent is always decimal
|
|
break;
|
|
}
|
|
if (hex)
|
|
error("binary-exponent-part required");
|
|
goto done;
|
|
|
|
case 5: // looking immediately to right of E
|
|
dblstate++;
|
|
if (c == '-' || c == '+')
|
|
break;
|
|
case 6: // 1st exponent digit expected
|
|
if (!isdigit(c))
|
|
error("exponent expected");
|
|
dblstate++;
|
|
break;
|
|
|
|
case 8: // past end of exponent digits
|
|
goto done;
|
|
}
|
|
break;
|
|
}
|
|
stringbuffer.writeByte(c);
|
|
}
|
|
done:
|
|
p--;
|
|
|
|
stringbuffer.writeByte(0);
|
|
|
|
#if _WIN32 && __DMC__
|
|
char *save = __locale_decpoint;
|
|
__locale_decpoint = ".";
|
|
#endif
|
|
#ifdef IN_GCC
|
|
t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble);
|
|
#else
|
|
t->float80value = strtold((char *)stringbuffer.data, NULL);
|
|
#endif
|
|
errno = 0;
|
|
switch (*p)
|
|
{
|
|
case 'F':
|
|
case 'f':
|
|
#ifdef IN_GCC
|
|
real_t::parse((char *)stringbuffer.data, real_t::Float);
|
|
#else
|
|
strtof((char *)stringbuffer.data, NULL);
|
|
#endif
|
|
result = TOKfloat32v;
|
|
p++;
|
|
break;
|
|
|
|
default:
|
|
#ifdef IN_GCC
|
|
real_t::parse((char *)stringbuffer.data, real_t::Double);
|
|
#else
|
|
strtod((char *)stringbuffer.data, NULL);
|
|
#endif
|
|
result = TOKfloat64v;
|
|
break;
|
|
|
|
case 'l':
|
|
if (!global.params.useDeprecated)
|
|
error("'l' suffix is deprecated, use 'L' instead");
|
|
case 'L':
|
|
result = TOKfloat80v;
|
|
p++;
|
|
break;
|
|
}
|
|
if (*p == 'i' || *p == 'I')
|
|
{
|
|
if (!global.params.useDeprecated && *p == 'I')
|
|
error("'I' suffix is deprecated, use 'i' instead");
|
|
p++;
|
|
switch (result)
|
|
{
|
|
case TOKfloat32v:
|
|
result = TOKimaginary32v;
|
|
break;
|
|
case TOKfloat64v:
|
|
result = TOKimaginary64v;
|
|
break;
|
|
case TOKfloat80v:
|
|
result = TOKimaginary80v;
|
|
break;
|
|
}
|
|
}
|
|
#if _WIN32 && __DMC__
|
|
__locale_decpoint = save;
|
|
#endif
|
|
if (errno == ERANGE)
|
|
error("number is not representable");
|
|
return result;
|
|
}
|
|
|
|
/*********************************************
|
|
* Do pragma.
|
|
* Currently, the only pragma supported is:
|
|
* #line linnum [filespec]
|
|
*/
|
|
|
|
void Lexer::pragma()
|
|
{
|
|
Token tok;
|
|
int linnum;
|
|
char *filespec = NULL;
|
|
Loc loc = this->loc;
|
|
|
|
scan(&tok);
|
|
if (tok.value != TOKidentifier || tok.ident != Id::line)
|
|
goto Lerr;
|
|
|
|
scan(&tok);
|
|
if (tok.value == TOKint32v || tok.value == TOKint64v)
|
|
linnum = tok.uns64value - 1;
|
|
else
|
|
goto Lerr;
|
|
|
|
while (1)
|
|
{
|
|
switch (*p)
|
|
{
|
|
case 0:
|
|
case 0x1A:
|
|
case '\n':
|
|
Lnewline:
|
|
this->loc.linnum = linnum;
|
|
if (filespec)
|
|
this->loc.filename = filespec;
|
|
return;
|
|
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n')
|
|
{ p--;
|
|
goto Lnewline;
|
|
}
|
|
continue;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f':
|
|
p++;
|
|
continue; // skip white space
|
|
|
|
case '_':
|
|
if (mod && memcmp(p, "__FILE__", 8) == 0)
|
|
{
|
|
p += 8;
|
|
filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars());
|
|
}
|
|
continue;
|
|
|
|
case '"':
|
|
if (filespec)
|
|
goto Lerr;
|
|
stringbuffer.reset();
|
|
p++;
|
|
while (1)
|
|
{ unsigned c;
|
|
|
|
c = *p;
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
case '\r':
|
|
case 0:
|
|
case 0x1A:
|
|
goto Lerr;
|
|
|
|
case '"':
|
|
stringbuffer.writeByte(0);
|
|
filespec = mem.strdup((char *)stringbuffer.data);
|
|
p++;
|
|
break;
|
|
|
|
default:
|
|
if (c & 0x80)
|
|
{ unsigned u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
goto Lerr;
|
|
}
|
|
stringbuffer.writeByte(c);
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
continue;
|
|
|
|
default:
|
|
if (*p & 0x80)
|
|
{ unsigned u = decodeUTF();
|
|
if (u == PS || u == LS)
|
|
goto Lnewline;
|
|
}
|
|
goto Lerr;
|
|
}
|
|
}
|
|
|
|
Lerr:
|
|
error(loc, "#line integer [\"filespec\"]\\n expected");
|
|
}
|
|
|
|
|
|
/********************************************
|
|
* Decode UTF character.
|
|
* Issue error messages for invalid sequences.
|
|
* Return decoded character, advance p to last character in UTF sequence.
|
|
*/
|
|
|
|
unsigned Lexer::decodeUTF()
|
|
{
|
|
dchar_t u;
|
|
unsigned char c;
|
|
unsigned char *s = p;
|
|
size_t len;
|
|
size_t idx;
|
|
char *msg;
|
|
|
|
c = *s;
|
|
assert(c & 0x80);
|
|
|
|
// Check length of remaining string up to 6 UTF-8 characters
|
|
for (len = 1; len < 6 && s[len]; len++)
|
|
;
|
|
|
|
idx = 0;
|
|
msg = utf_decodeChar(s, len, &idx, &u);
|
|
p += idx - 1;
|
|
if (msg)
|
|
{
|
|
error("%s", msg);
|
|
}
|
|
return u;
|
|
}
|
|
|
|
|
|
/***************************************************
|
|
* Parse doc comment embedded between t->ptr and p.
|
|
* Remove trailing blanks and tabs from lines.
|
|
* Replace all newlines with \n.
|
|
* Remove leading comment character from each line.
|
|
* Decide if it's a lineComment or a blockComment.
|
|
* Append to previous one for this token.
|
|
*/
|
|
|
|
void Lexer::getDocComment(Token *t, unsigned lineComment)
|
|
{
|
|
OutBuffer buf;
|
|
unsigned char ct = t->ptr[2];
|
|
unsigned char *q = t->ptr + 3; // start of comment text
|
|
int linestart = 0;
|
|
|
|
unsigned char *qend = p;
|
|
if (ct == '*' || ct == '+')
|
|
qend -= 2;
|
|
|
|
/* Scan over initial row of ****'s or ++++'s or ////'s
|
|
*/
|
|
for (; q < qend; q++)
|
|
{
|
|
if (*q != ct)
|
|
break;
|
|
}
|
|
|
|
/* Remove trailing row of ****'s or ++++'s
|
|
*/
|
|
if (ct != '/')
|
|
{
|
|
for (; q < qend; qend--)
|
|
{
|
|
if (qend[-1] != ct)
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (; q < qend; q++)
|
|
{
|
|
unsigned char c = *q;
|
|
|
|
switch (c)
|
|
{
|
|
case '*':
|
|
case '+':
|
|
if (linestart && c == ct)
|
|
{ linestart = 0;
|
|
/* Trim preceding whitespace up to preceding \n
|
|
*/
|
|
while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
|
|
buf.offset--;
|
|
continue;
|
|
}
|
|
break;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
break;
|
|
|
|
case '\r':
|
|
if (q[1] == '\n')
|
|
continue; // skip the \r
|
|
goto Lnewline;
|
|
|
|
default:
|
|
if (c == 226)
|
|
{
|
|
// If LS or PS
|
|
if (q[1] == 128 &&
|
|
(q[2] == 168 || q[2] == 169))
|
|
{
|
|
q += 2;
|
|
goto Lnewline;
|
|
}
|
|
}
|
|
linestart = 0;
|
|
break;
|
|
|
|
Lnewline:
|
|
c = '\n'; // replace all newlines with \n
|
|
case '\n':
|
|
linestart = 1;
|
|
|
|
/* Trim trailing whitespace
|
|
*/
|
|
while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
|
|
buf.offset--;
|
|
|
|
break;
|
|
}
|
|
buf.writeByte(c);
|
|
}
|
|
|
|
// Always end with a newline
|
|
if (!buf.offset || buf.data[buf.offset - 1] != '\n')
|
|
buf.writeByte('\n');
|
|
|
|
buf.writeByte(0);
|
|
|
|
// It's a line comment if the start of the doc comment comes
|
|
// after other non-whitespace on the same line.
|
|
unsigned char** dc = (lineComment && anyToken)
|
|
? &t->lineComment
|
|
: &t->blockComment;
|
|
|
|
// Combine with previous doc comment, if any
|
|
if (*dc)
|
|
*dc = combineComments(*dc, (unsigned char *)buf.data);
|
|
else
|
|
*dc = (unsigned char *)buf.extractData();
|
|
}
|
|
|
|
/********************************************
|
|
* Combine two document comments into one.
|
|
*/
|
|
|
|
unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2)
|
|
{
|
|
unsigned char *c = c2;
|
|
|
|
if (c1)
|
|
{ c = c1;
|
|
if (c2)
|
|
{ size_t len1 = strlen((char *)c1);
|
|
size_t len2 = strlen((char *)c2);
|
|
|
|
c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1);
|
|
memcpy(c, c1, len1);
|
|
c[len1] = '\n';
|
|
memcpy(c + len1 + 1, c2, len2);
|
|
c[len1 + 1 + len2] = 0;
|
|
}
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/********************************************
|
|
* Create an identifier in the string table.
|
|
*/
|
|
|
|
Identifier *Lexer::idPool(const char *s)
|
|
{
|
|
size_t len = strlen(s);
|
|
StringValue *sv = stringtable.update(s, len);
|
|
Identifier *id = (Identifier *) sv->ptrvalue;
|
|
if (!id)
|
|
{
|
|
id = new Identifier(sv->lstring.string, TOKidentifier);
|
|
sv->ptrvalue = id;
|
|
}
|
|
return id;
|
|
}
|
|
|
|
/*********************************************
|
|
* Create a unique identifier using the prefix s.
|
|
*/
|
|
|
|
Identifier *Lexer::uniqueId(const char *s, int num)
|
|
{ char buffer[32];
|
|
size_t slen = strlen(s);
|
|
|
|
assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer));
|
|
sprintf(buffer, "%s%d", s, num);
|
|
return idPool(buffer);
|
|
}
|
|
|
|
Identifier *Lexer::uniqueId(const char *s)
|
|
{
|
|
static int num;
|
|
return uniqueId(s, ++num);
|
|
}
|
|
|
|
/****************************************
|
|
*/
|
|
|
|
struct Keyword
|
|
{ char *name;
|
|
enum TOK value;
|
|
};
|
|
|
|
static Keyword keywords[] =
|
|
{
|
|
// { "", TOK },
|
|
|
|
{ "this", TOKthis },
|
|
{ "super", TOKsuper },
|
|
{ "assert", TOKassert },
|
|
{ "null", TOKnull },
|
|
{ "true", TOKtrue },
|
|
{ "false", TOKfalse },
|
|
{ "cast", TOKcast },
|
|
{ "new", TOKnew },
|
|
{ "delete", TOKdelete },
|
|
{ "throw", TOKthrow },
|
|
{ "module", TOKmodule },
|
|
{ "pragma", TOKpragma },
|
|
{ "typeof", TOKtypeof },
|
|
{ "typeid", TOKtypeid },
|
|
|
|
{ "template", TOKtemplate },
|
|
|
|
{ "void", TOKvoid },
|
|
{ "byte", TOKint8 },
|
|
{ "ubyte", TOKuns8 },
|
|
{ "short", TOKint16 },
|
|
{ "ushort", TOKuns16 },
|
|
{ "int", TOKint32 },
|
|
{ "uint", TOKuns32 },
|
|
{ "long", TOKint64 },
|
|
{ "ulong", TOKuns64 },
|
|
{ "cent", TOKcent, },
|
|
{ "ucent", TOKucent, },
|
|
{ "float", TOKfloat32 },
|
|
{ "double", TOKfloat64 },
|
|
{ "real", TOKfloat80 },
|
|
|
|
{ "bool", TOKbool },
|
|
{ "char", TOKchar },
|
|
{ "wchar", TOKwchar },
|
|
{ "dchar", TOKdchar },
|
|
|
|
{ "ifloat", TOKimaginary32 },
|
|
{ "idouble", TOKimaginary64 },
|
|
{ "ireal", TOKimaginary80 },
|
|
|
|
{ "cfloat", TOKcomplex32 },
|
|
{ "cdouble", TOKcomplex64 },
|
|
{ "creal", TOKcomplex80 },
|
|
|
|
{ "delegate", TOKdelegate },
|
|
{ "function", TOKfunction },
|
|
|
|
{ "is", TOKis },
|
|
{ "if", TOKif },
|
|
{ "else", TOKelse },
|
|
{ "while", TOKwhile },
|
|
{ "for", TOKfor },
|
|
{ "do", TOKdo },
|
|
{ "switch", TOKswitch },
|
|
{ "case", TOKcase },
|
|
{ "default", TOKdefault },
|
|
{ "break", TOKbreak },
|
|
{ "continue", TOKcontinue },
|
|
{ "synchronized", TOKsynchronized },
|
|
{ "return", TOKreturn },
|
|
{ "goto", TOKgoto },
|
|
{ "try", TOKtry },
|
|
{ "catch", TOKcatch },
|
|
{ "finally", TOKfinally },
|
|
{ "with", TOKwith },
|
|
{ "asm", TOKasm },
|
|
{ "foreach", TOKforeach },
|
|
{ "foreach_reverse", TOKforeach_reverse },
|
|
{ "scope", TOKscope },
|
|
|
|
{ "struct", TOKstruct },
|
|
{ "class", TOKclass },
|
|
{ "interface", TOKinterface },
|
|
{ "union", TOKunion },
|
|
{ "enum", TOKenum },
|
|
{ "import", TOKimport },
|
|
{ "mixin", TOKmixin },
|
|
{ "static", TOKstatic },
|
|
{ "final", TOKfinal },
|
|
{ "const", TOKconst },
|
|
{ "typedef", TOKtypedef },
|
|
{ "alias", TOKalias },
|
|
{ "override", TOKoverride },
|
|
{ "abstract", TOKabstract },
|
|
{ "volatile", TOKvolatile },
|
|
{ "debug", TOKdebug },
|
|
{ "deprecated", TOKdeprecated },
|
|
{ "in", TOKin },
|
|
{ "out", TOKout },
|
|
{ "inout", TOKinout },
|
|
{ "lazy", TOKlazy },
|
|
{ "auto", TOKauto },
|
|
|
|
{ "align", TOKalign },
|
|
{ "extern", TOKextern },
|
|
{ "private", TOKprivate },
|
|
{ "package", TOKpackage },
|
|
{ "protected", TOKprotected },
|
|
{ "public", TOKpublic },
|
|
{ "export", TOKexport },
|
|
|
|
{ "body", TOKbody },
|
|
{ "invariant", TOKinvariant },
|
|
{ "unittest", TOKunittest },
|
|
{ "version", TOKversion },
|
|
//{ "manifest", TOKmanifest },
|
|
|
|
// Added after 1.0
|
|
{ "ref", TOKref },
|
|
{ "macro", TOKmacro },
|
|
#if V2
|
|
{ "pure", TOKpure },
|
|
{ "nothrow", TOKnothrow },
|
|
{ "__traits", TOKtraits },
|
|
{ "__overloadset", TOKoverloadset },
|
|
#endif
|
|
};
|
|
|
|
int Token::isKeyword()
|
|
{
|
|
for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)
|
|
{
|
|
if (keywords[u].value == value)
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void Lexer::initKeywords()
|
|
{ StringValue *sv;
|
|
unsigned u;
|
|
enum TOK v;
|
|
unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]);
|
|
|
|
if (global.params.Dversion == 1)
|
|
nkeywords -= 2;
|
|
|
|
cmtable_init();
|
|
|
|
for (u = 0; u < nkeywords; u++)
|
|
{ char *s;
|
|
|
|
//printf("keyword[%d] = '%s'\n",u, keywords[u].name);
|
|
s = keywords[u].name;
|
|
v = keywords[u].value;
|
|
sv = stringtable.insert(s, strlen(s));
|
|
sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v);
|
|
|
|
//printf("tochars[%d] = '%s'\n",v, s);
|
|
Token::tochars[v] = s;
|
|
}
|
|
|
|
Token::tochars[TOKeof] = "EOF";
|
|
Token::tochars[TOKlcurly] = "{";
|
|
Token::tochars[TOKrcurly] = "}";
|
|
Token::tochars[TOKlparen] = "(";
|
|
Token::tochars[TOKrparen] = ")";
|
|
Token::tochars[TOKlbracket] = "[";
|
|
Token::tochars[TOKrbracket] = "]";
|
|
Token::tochars[TOKsemicolon] = ";";
|
|
Token::tochars[TOKcolon] = ":";
|
|
Token::tochars[TOKcomma] = ",";
|
|
Token::tochars[TOKdot] = ".";
|
|
Token::tochars[TOKxor] = "^";
|
|
Token::tochars[TOKxorass] = "^=";
|
|
Token::tochars[TOKassign] = "=";
|
|
Token::tochars[TOKconstruct] = "=";
|
|
#if V2
|
|
Token::tochars[TOKblit] = "=";
|
|
#endif
|
|
Token::tochars[TOKlt] = "<";
|
|
Token::tochars[TOKgt] = ">";
|
|
Token::tochars[TOKle] = "<=";
|
|
Token::tochars[TOKge] = ">=";
|
|
Token::tochars[TOKequal] = "==";
|
|
Token::tochars[TOKnotequal] = "!=";
|
|
Token::tochars[TOKnotidentity] = "!is";
|
|
Token::tochars[TOKtobool] = "!!";
|
|
|
|
Token::tochars[TOKunord] = "!<>=";
|
|
Token::tochars[TOKue] = "!<>";
|
|
Token::tochars[TOKlg] = "<>";
|
|
Token::tochars[TOKleg] = "<>=";
|
|
Token::tochars[TOKule] = "!>";
|
|
Token::tochars[TOKul] = "!>=";
|
|
Token::tochars[TOKuge] = "!<";
|
|
Token::tochars[TOKug] = "!<=";
|
|
|
|
Token::tochars[TOKnot] = "!";
|
|
Token::tochars[TOKtobool] = "!!";
|
|
Token::tochars[TOKshl] = "<<";
|
|
Token::tochars[TOKshr] = ">>";
|
|
Token::tochars[TOKushr] = ">>>";
|
|
Token::tochars[TOKadd] = "+";
|
|
Token::tochars[TOKmin] = "-";
|
|
Token::tochars[TOKmul] = "*";
|
|
Token::tochars[TOKdiv] = "/";
|
|
Token::tochars[TOKmod] = "%";
|
|
Token::tochars[TOKslice] = "..";
|
|
Token::tochars[TOKdotdotdot] = "...";
|
|
Token::tochars[TOKand] = "&";
|
|
Token::tochars[TOKandand] = "&&";
|
|
Token::tochars[TOKor] = "|";
|
|
Token::tochars[TOKoror] = "||";
|
|
Token::tochars[TOKarray] = "[]";
|
|
Token::tochars[TOKindex] = "[i]";
|
|
Token::tochars[TOKaddress] = "&";
|
|
Token::tochars[TOKstar] = "*";
|
|
Token::tochars[TOKtilde] = "~";
|
|
Token::tochars[TOKdollar] = "$";
|
|
Token::tochars[TOKcast] = "cast";
|
|
Token::tochars[TOKplusplus] = "++";
|
|
Token::tochars[TOKminusminus] = "--";
|
|
Token::tochars[TOKtype] = "type";
|
|
Token::tochars[TOKquestion] = "?";
|
|
Token::tochars[TOKneg] = "-";
|
|
Token::tochars[TOKuadd] = "+";
|
|
Token::tochars[TOKvar] = "var";
|
|
Token::tochars[TOKaddass] = "+=";
|
|
Token::tochars[TOKminass] = "-=";
|
|
Token::tochars[TOKmulass] = "*=";
|
|
Token::tochars[TOKdivass] = "/=";
|
|
Token::tochars[TOKmodass] = "%=";
|
|
Token::tochars[TOKshlass] = "<<=";
|
|
Token::tochars[TOKshrass] = ">>=";
|
|
Token::tochars[TOKushrass] = ">>>=";
|
|
Token::tochars[TOKandass] = "&=";
|
|
Token::tochars[TOKorass] = "|=";
|
|
Token::tochars[TOKcatass] = "~=";
|
|
Token::tochars[TOKcat] = "~";
|
|
Token::tochars[TOKcall] = "call";
|
|
Token::tochars[TOKidentity] = "is";
|
|
Token::tochars[TOKnotidentity] = "!is";
|
|
|
|
Token::tochars[TOKorass] = "|=";
|
|
Token::tochars[TOKidentifier] = "identifier";
|
|
|
|
// For debugging
|
|
Token::tochars[TOKdotexp] = "dotexp";
|
|
Token::tochars[TOKdotti] = "dotti";
|
|
Token::tochars[TOKdotvar] = "dotvar";
|
|
Token::tochars[TOKdottype] = "dottype";
|
|
Token::tochars[TOKsymoff] = "symoff";
|
|
Token::tochars[TOKtypedot] = "typedot";
|
|
Token::tochars[TOKarraylength] = "arraylength";
|
|
Token::tochars[TOKarrayliteral] = "arrayliteral";
|
|
Token::tochars[TOKassocarrayliteral] = "assocarrayliteral";
|
|
Token::tochars[TOKstructliteral] = "structliteral";
|
|
Token::tochars[TOKstring] = "string";
|
|
Token::tochars[TOKdsymbol] = "symbol";
|
|
Token::tochars[TOKtuple] = "tuple";
|
|
Token::tochars[TOKdeclaration] = "declaration";
|
|
Token::tochars[TOKdottd] = "dottd";
|
|
Token::tochars[TOKon_scope_exit] = "scope(exit)";
|
|
}
|