mirror of
https://github.com/xomboverlord/ldc.git
synced 2026-01-11 18:33:14 +01:00
Merged DMD commit f30daa9797cf620ba264c0446f0215eb2464ea7a:
bugzilla 4389 ICE(constfold.c, expression.c), or wrong code: string~=dchar in CTFE
This commit is contained in:
@@ -25,6 +25,7 @@
|
||||
#include "expression.h"
|
||||
#include "aggregate.h"
|
||||
#include "declaration.h"
|
||||
#include "utf.h"
|
||||
|
||||
#ifdef IN_GCC
|
||||
#include "d-gcc-real.h"
|
||||
@@ -1329,10 +1330,12 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)
|
||||
|
||||
if (e1->op == TOKnull && (e2->op == TOKint64 || e2->op == TOKstructliteral))
|
||||
{ e = e2;
|
||||
t = t1;
|
||||
goto L2;
|
||||
}
|
||||
else if ((e1->op == TOKint64 || e1->op == TOKstructliteral) && e2->op == TOKnull)
|
||||
{ e = e1;
|
||||
t = t2;
|
||||
L2:
|
||||
Type *tn = e->type->toBasetype();
|
||||
if (tn->ty == Tchar || tn->ty == Twchar || tn->ty == Tdchar)
|
||||
@@ -1340,12 +1343,15 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)
|
||||
// Create a StringExp
|
||||
void *s;
|
||||
StringExp *es;
|
||||
size_t len = 1;
|
||||
int sz = tn->size();
|
||||
if (t->nextOf())
|
||||
t = t->nextOf()->toBasetype();
|
||||
int sz = t->size();
|
||||
|
||||
dinteger_t v = e->toInteger();
|
||||
|
||||
size_t len = utf_codeLength(sz, v);
|
||||
s = mem.malloc((len + 1) * sz);
|
||||
memcpy((unsigned char *)s, &v, sz);
|
||||
utf_encode(sz, s, v);
|
||||
|
||||
// Add terminating 0
|
||||
memset((unsigned char *)s + len * sz, 0, sz);
|
||||
@@ -1407,13 +1413,13 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)
|
||||
StringExp *es1 = (StringExp *)e1;
|
||||
StringExp *es;
|
||||
Type *t;
|
||||
size_t len = es1->len + 1;
|
||||
int sz = es1->sz;
|
||||
dinteger_t v = e2->toInteger();
|
||||
|
||||
size_t len = es1->len + utf_codeLength(sz, v);
|
||||
s = mem.malloc((len + 1) * sz);
|
||||
memcpy(s, es1->string, es1->len * sz);
|
||||
memcpy((unsigned char *)s + es1->len * sz, &v, sz);
|
||||
utf_encode(sz, (unsigned char *)s + (sz * es1->len), v);
|
||||
|
||||
// Add terminating 0
|
||||
memset((unsigned char *)s + len * sz, 0, sz);
|
||||
@@ -1463,7 +1469,7 @@ Expression *Cat(Type *type, Expression *e1, Expression *e2)
|
||||
|
||||
if (type->toBasetype()->ty == Tsarray)
|
||||
{
|
||||
e->type = new TypeSArray(t1->next, new IntegerExp(loc, es1->elements->dim, Type::tindex));
|
||||
e->type = new TypeSArray(t1->nextOf(), new IntegerExp(loc, es1->elements->dim, Type::tindex));
|
||||
e->type = e->type->semantic(loc, NULL);
|
||||
}
|
||||
else
|
||||
|
||||
125
dmd/utf.c
125
dmd/utf.c
@@ -11,6 +11,7 @@
|
||||
// http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "utf.h"
|
||||
@@ -21,6 +22,40 @@ int utf_isValidDchar(dchar_t c)
|
||||
(c > 0xDFFF && c <= 0x10FFFF && c != 0xFFFE && c != 0xFFFF);
|
||||
}
|
||||
|
||||
static const unsigned char UTF8stride[256] =
|
||||
{
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||
4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
|
||||
};
|
||||
|
||||
/**
|
||||
* stride() returns the length of a UTF-8 sequence starting at index i
|
||||
* in string s.
|
||||
* Returns:
|
||||
* The number of bytes in the UTF-8 sequence or
|
||||
* 0xFF meaning s[i] is not the start of of UTF-8 sequence.
|
||||
*/
|
||||
|
||||
unsigned stride(unsigned char* s, size_t i)
|
||||
{
|
||||
unsigned result = UTF8stride[s[i]];
|
||||
return result;
|
||||
}
|
||||
|
||||
/********************************************
|
||||
* Decode a single UTF-8 character sequence.
|
||||
* Returns:
|
||||
@@ -193,3 +228,93 @@ const char *utf_decodeWchar(unsigned short *s, size_t len, size_t *pidx, dchar_t
|
||||
return msg;
|
||||
}
|
||||
|
||||
void utf_encodeChar(unsigned char *s, dchar_t c)
|
||||
{
|
||||
if (c <= 0x7F)
|
||||
{
|
||||
s[0] = (char) c;
|
||||
}
|
||||
else if (c <= 0x7FF)
|
||||
{
|
||||
s[0] = (char)(0xC0 | (c >> 6));
|
||||
s[1] = (char)(0x80 | (c & 0x3F));
|
||||
}
|
||||
else if (c <= 0xFFFF)
|
||||
{
|
||||
s[0] = (char)(0xE0 | (c >> 12));
|
||||
s[1] = (char)(0x80 | ((c >> 6) & 0x3F));
|
||||
s[2] = (char)(0x80 | (c & 0x3F));
|
||||
}
|
||||
else if (c <= 0x10FFFF)
|
||||
{
|
||||
s[0] = (char)(0xF0 | (c >> 18));
|
||||
s[1] = (char)(0x80 | ((c >> 12) & 0x3F));
|
||||
s[2] = (char)(0x80 | ((c >> 6) & 0x3F));
|
||||
s[3] = (char)(0x80 | (c & 0x3F));
|
||||
}
|
||||
else
|
||||
assert(0);
|
||||
}
|
||||
|
||||
void utf_encodeWchar(unsigned short *s, dchar_t c)
|
||||
{
|
||||
if (c <= 0xFFFF)
|
||||
{
|
||||
s[0] = (wchar_t) c;
|
||||
}
|
||||
else
|
||||
{
|
||||
s[0] = (wchar_t) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
|
||||
s[1] = (wchar_t) (((c - 0x10000) & 0x3FF) + 0xDC00);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the code length of c in the encoding.
|
||||
* The code is returned in character count, not in bytes.
|
||||
*/
|
||||
|
||||
int utf_codeLengthChar(dchar_t c)
|
||||
{
|
||||
return
|
||||
c <= 0x7F ? 1
|
||||
: c <= 0x7FF ? 2
|
||||
: c <= 0xFFFF ? 3
|
||||
: c <= 0x10FFFF ? 4
|
||||
: (assert(false), 6);
|
||||
}
|
||||
|
||||
int utf_codeLengthWchar(dchar_t c)
|
||||
{
|
||||
return c <= 0xFFFF ? 1 : 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the code length of c in the encoding.
|
||||
* sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32.
|
||||
* The code is returned in character count, not in bytes.
|
||||
*/
|
||||
int utf_codeLength(int sz, dchar_t c)
|
||||
{
|
||||
if (sz == 1)
|
||||
return utf_codeLengthChar(c);
|
||||
if (sz == 2)
|
||||
return utf_codeLengthWchar(c);
|
||||
assert(sz == 4);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void utf_encode(int sz, void *s, dchar_t c)
|
||||
{
|
||||
if (sz == 1)
|
||||
utf_encodeChar((unsigned char *)s, c);
|
||||
else if (sz == 2)
|
||||
utf_encodeWchar((unsigned short *)s, c);
|
||||
else
|
||||
{
|
||||
assert(sz == 4);
|
||||
memcpy((unsigned char *)s, &c, sz);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,4 +23,13 @@ const char *utf_validateString(unsigned char *s, size_t len);
|
||||
|
||||
extern int isUniAlpha(dchar_t);
|
||||
|
||||
void utf_encodeChar(unsigned char *s, dchar_t c);
|
||||
void utf_encodeWchar(unsigned short *s, dchar_t c);
|
||||
|
||||
int utf_codeLengthChar(dchar_t c);
|
||||
int utf_codeLengthWchar(dchar_t c);
|
||||
|
||||
int utf_codeLength(int sz, dchar_t c);
|
||||
void utf_encode(int sz, void *s, dchar_t c);
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user