Removed tango from the repository and instead added a runtime dir with the files needed to patch and build tango from svn.

Reworked the LLVMDC specific pragmas.
This commit is contained in:
Tomas Lindquist Olsen
2008-08-01 00:32:06 +02:00
parent 8f14ece3af
commit 07cfb67178
534 changed files with 1502 additions and 258956 deletions

View File

@@ -0,0 +1,86 @@
/*******************************************************************************
copyright: Copyright (c) 2004 Tango group. All rights reserved
license: BSD style: $(LICENSE)
version: Initial release: July 2006
Various low-level console oriented utilities
*******************************************************************************/
module util.console;
private import util.string;
version (Win32)
{
private extern (Windows) int GetStdHandle (int);
private extern (Windows) int WriteFile (int, char*, int, int*, void*);
}
else
version (Posix)
{
private extern (C) ptrdiff_t write (int, void*, size_t);
}
/+
// emit a char[] to the console. Note that Win32 does not handle utf8, but
// then neither does fprintf (stderr). This will handle redirection though.
// May need to remedy the utf8 issue
int console (char[] s)
{
version (Win32)
{
int count;
if (WriteFile (GetStdHandle(0xfffffff5), s.ptr, s.length, &count, null))
return count;
return -1;
}
else
version (Posix)
{
return write (2, s.ptr, s.length);
}
}
// emit an integer to the console
int console (uint i)
{
char[10] tmp = void;
return console (intToUtf8 (tmp, i));
}
+/
struct Console
{
Console opCall (char[] s)
{
version (Win32)
{
int count;
WriteFile (GetStdHandle(0xfffffff5), s.ptr, s.length, &count, null);
}
else
version (Posix)
{
write (2, s.ptr, s.length);
}
return *this;
}
// emit an integer to the console
Console opCall (size_t i)
{
char[25] tmp = void;
return console (intToUtf8 (tmp, i));
}
}
Console console;

View File

@@ -0,0 +1,106 @@
/*
* Copyright (C) 2004-2005 by Digital Mars, www.digitalmars.com
* Written by Walter Bright
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, in both source and binary form, subject to the following
* restrictions:
*
* o The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* o Altered source versions must be plainly marked as such, and must not
* be misrepresented as being the original software.
* o This notice may not be removed or altered from any source
* distribution.
*/
// Simple ASCII char classification functions
module util.ctype;
int isalnum(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_ALP|_DIG) : 0; }
int isalpha(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_ALP) : 0; }
int iscntrl(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_CTL) : 0; }
int isdigit(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_DIG) : 0; }
int islower(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_LC) : 0; }
int ispunct(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_PNC) : 0; }
int isspace(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_SPC) : 0; }
int isupper(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_UC) : 0; }
int isxdigit(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_HEX) : 0; }
int isgraph(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_ALP|_DIG|_PNC) : 0; }
int isprint(dchar c) { return (c <= 0x7F) ? _ctype[c] & (_ALP|_DIG|_PNC|_BLK) : 0; }
int isascii(dchar c) { return c <= 0x7F; }
dchar tolower(dchar c)
out (result)
{
assert(!isupper(result));
}
body
{
return isupper(c) ? c + (cast(dchar)'a' - 'A') : c;
}
dchar toupper(dchar c)
out (result)
{
assert(!islower(result));
}
body
{
return islower(c) ? c - (cast(dchar)'a' - 'A') : c;
}
private:
enum
{
_SPC = 8,
_CTL = 0x20,
_BLK = 0x40,
_HEX = 0x80,
_UC = 1,
_LC = 2,
_PNC = 0x10,
_DIG = 4,
_ALP = _UC|_LC,
}
ubyte _ctype[128] =
[
_CTL,_CTL,_CTL,_CTL,_CTL,_CTL,_CTL,_CTL,
_CTL,_CTL|_SPC,_CTL|_SPC,_CTL|_SPC,_CTL|_SPC,_CTL|_SPC,_CTL,_CTL,
_CTL,_CTL,_CTL,_CTL,_CTL,_CTL,_CTL,_CTL,
_CTL,_CTL,_CTL,_CTL,_CTL,_CTL,_CTL,_CTL,
_SPC|_BLK,_PNC,_PNC,_PNC,_PNC,_PNC,_PNC,_PNC,
_PNC,_PNC,_PNC,_PNC,_PNC,_PNC,_PNC,_PNC,
_DIG|_HEX,_DIG|_HEX,_DIG|_HEX,_DIG|_HEX,_DIG|_HEX,
_DIG|_HEX,_DIG|_HEX,_DIG|_HEX,_DIG|_HEX,_DIG|_HEX,
_PNC,_PNC,_PNC,_PNC,_PNC,_PNC,
_PNC,_UC|_HEX,_UC|_HEX,_UC|_HEX,_UC|_HEX,_UC|_HEX,_UC|_HEX,_UC,
_UC,_UC,_UC,_UC,_UC,_UC,_UC,_UC,
_UC,_UC,_UC,_UC,_UC,_UC,_UC,_UC,
_UC,_UC,_UC,_PNC,_PNC,_PNC,_PNC,_PNC,
_PNC,_LC|_HEX,_LC|_HEX,_LC|_HEX,_LC|_HEX,_LC|_HEX,_LC|_HEX,_LC,
_LC,_LC,_LC,_LC,_LC,_LC,_LC,_LC,
_LC,_LC,_LC,_LC,_LC,_LC,_LC,_LC,
_LC,_LC,_LC,_PNC,_PNC,_PNC,_PNC,_CTL
];
unittest
{
assert(isspace(' '));
assert(!isspace('z'));
assert(toupper('a') == 'A');
assert(tolower('Q') == 'q');
assert(!isxdigit('G'));
}

View File

@@ -0,0 +1,50 @@
/*******************************************************************************
copyright: Copyright (c) 2004 Tango group. All rights reserved
license: BSD style: $(LICENSE)
version: Initial release: July 2006
Various char[] utilities
*******************************************************************************/
module util.string;
private import tango.stdc.string;
// convert uint to char[], within the given buffer
// Returns a valid slice of the populated buffer
char[] intToUtf8 (char[] tmp, size_t val)
in {
assert (tmp.length > 20, "atoi buffer should be 20 or more chars wide");
}
body
{
char* p = tmp.ptr + tmp.length;
do {
*--p = (val % 10) + '0';
} while (val /= 10);
return tmp [cast(size_t)(p - tmp.ptr) .. $];
}
// function to compare two strings
int stringCompare (char[] s1, char[] s2)
{
auto len = s1.length;
if (s2.length < len)
len = s2.length;
int result = memcmp(s1.ptr, s2.ptr, len);
if (result == 0)
result = cast(int)s1.length - cast(int)s2.length;
return result;
}

851
runtime/internal/util/utf.d Normal file
View File

@@ -0,0 +1,851 @@
// utf.d
/*
* Copyright (C) 2003-2004 by Digital Mars, www.digitalmars.com
* Written by Walter Bright
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* o The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* o Altered source versions must be plainly marked as such, and must not
* be misrepresented as being the original software.
* o This notice may not be removed or altered from any source
* distribution.
*/
// Description of UTF-8 at:
// http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
// http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335
module util.utf;
extern (C) void onUnicodeError( char[] msg, size_t idx );
bool isValidDchar(dchar c)
{
/* Note: FFFE and FFFF are specifically permitted by the
* Unicode standard for application internal use, but are not
* allowed for interchange.
* (thanks to Arcane Jill)
*/
return c < 0xD800 ||
(c > 0xDFFF && c <= 0x10FFFF /*&& c != 0xFFFE && c != 0xFFFF*/);
}
unittest
{
debug(utf) printf("utf.isValidDchar.unittest\n");
assert(isValidDchar(cast(dchar)'a') == true);
assert(isValidDchar(cast(dchar)0x1FFFFF) == false);
}
/* This array gives the length of a UTF-8 sequence indexed by the value
* of the leading byte. An FF represents an illegal starting value of
* a UTF-8 sequence.
* FF is used instead of 0 to avoid having loops hang.
*/
ubyte[256] UTF8stride =
[
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF,
];
uint stride(char[] s, size_t i)
{
return UTF8stride[s[i]];
}
uint stride(wchar[] s, size_t i)
{ uint u = s[i];
return 1 + (u >= 0xD800 && u <= 0xDBFF);
}
uint stride(dchar[] s, size_t i)
{
return 1;
}
/*******************************************
* Given an index into an array of char's,
* and assuming that index is at the start of a UTF character,
* determine the number of UCS characters up to that index.
*/
size_t toUCSindex(char[] s, size_t i)
{
size_t n;
size_t j;
size_t stride;
for (j = 0; j < i; j += stride)
{
stride = UTF8stride[s[j]];
if (stride == 0xFF)
goto Lerr;
n++;
}
if (j > i)
{
Lerr:
onUnicodeError("invalid UTF-8 sequence", j);
}
return n;
}
size_t toUCSindex(wchar[] s, size_t i)
{
size_t n;
size_t j;
for (j = 0; j < i; )
{ uint u = s[j];
j += 1 + (u >= 0xD800 && u <= 0xDBFF);
n++;
}
if (j > i)
{
Lerr:
onUnicodeError("invalid UTF-16 sequence", j);
}
return n;
}
size_t toUCSindex(dchar[] s, size_t i)
{
return i;
}
/******************************************
* Given a UCS index into an array of characters, return the UTF index.
*/
size_t toUTFindex(char[] s, size_t n)
{
size_t i;
while (n--)
{
uint j = UTF8stride[s[i]];
if (j == 0xFF)
onUnicodeError("invalid UTF-8 sequence", i);
i += j;
}
return i;
}
size_t toUTFindex(wchar[] s, size_t n)
{
size_t i;
while (n--)
{ wchar u = s[i];
i += 1 + (u >= 0xD800 && u <= 0xDBFF);
}
return i;
}
size_t toUTFindex(dchar[] s, size_t n)
{
return n;
}
/* =================== Decode ======================= */
dchar decode(char[] s, inout size_t idx)
in
{
assert(idx >= 0 && idx < s.length);
}
out (result)
{
assert(isValidDchar(result));
}
body
{
size_t len = s.length;
dchar V;
size_t i = idx;
char u = s[i];
if (u & 0x80)
{ uint n;
char u2;
/* The following encodings are valid, except for the 5 and 6 byte
* combinations:
* 0xxxxxxx
* 110xxxxx 10xxxxxx
* 1110xxxx 10xxxxxx 10xxxxxx
* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
for (n = 1; ; n++)
{
if (n > 4)
goto Lerr; // only do the first 4 of 6 encodings
if (((u << n) & 0x80) == 0)
{
if (n == 1)
goto Lerr;
break;
}
}
// Pick off (7 - n) significant bits of B from first byte of octet
V = cast(dchar)(u & ((1 << (7 - n)) - 1));
if (i + (n - 1) >= len)
goto Lerr; // off end of string
/* The following combinations are overlong, and illegal:
* 1100000x (10xxxxxx)
* 11100000 100xxxxx (10xxxxxx)
* 11110000 1000xxxx (10xxxxxx 10xxxxxx)
* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
*/
u2 = s[i + 1];
if ((u & 0xFE) == 0xC0 ||
(u == 0xE0 && (u2 & 0xE0) == 0x80) ||
(u == 0xF0 && (u2 & 0xF0) == 0x80) ||
(u == 0xF8 && (u2 & 0xF8) == 0x80) ||
(u == 0xFC && (u2 & 0xFC) == 0x80))
goto Lerr; // overlong combination
for (uint j = 1; j != n; j++)
{
u = s[i + j];
if ((u & 0xC0) != 0x80)
goto Lerr; // trailing bytes are 10xxxxxx
V = (V << 6) | (u & 0x3F);
}
if (!isValidDchar(V))
goto Lerr;
i += n;
}
else
{
V = cast(dchar) u;
i++;
}
idx = i;
return V;
Lerr:
onUnicodeError("invalid UTF-8 sequence", i);
return V; // dummy return
}
unittest
{ size_t i;
dchar c;
debug(utf) printf("utf.decode.unittest\n");
static char[] s1 = "abcd";
i = 0;
c = decode(s1, i);
assert(c == cast(dchar)'a');
assert(i == 1);
c = decode(s1, i);
assert(c == cast(dchar)'b');
assert(i == 2);
static char[] s2 = "\xC2\xA9";
i = 0;
c = decode(s2, i);
assert(c == cast(dchar)'\u00A9');
assert(i == 2);
static char[] s3 = "\xE2\x89\xA0";
i = 0;
c = decode(s3, i);
assert(c == cast(dchar)'\u2260');
assert(i == 3);
static char[][] s4 =
[ "\xE2\x89", // too short
"\xC0\x8A",
"\xE0\x80\x8A",
"\xF0\x80\x80\x8A",
"\xF8\x80\x80\x80\x8A",
"\xFC\x80\x80\x80\x80\x8A",
];
for (int j = 0; j < s4.length; j++)
{
try
{
i = 0;
c = decode(s4[j], i);
assert(0);
}
catch (Object o)
{
i = 23;
}
assert(i == 23);
}
}
/********************************************************/
dchar decode(wchar[] s, inout size_t idx)
in
{
assert(idx >= 0 && idx < s.length);
}
out (result)
{
assert(isValidDchar(result));
}
body
{
char[] msg;
dchar V;
size_t i = idx;
uint u = s[i];
if (u & ~0x7F)
{ if (u >= 0xD800 && u <= 0xDBFF)
{ uint u2;
if (i + 1 == s.length)
{ msg = "surrogate UTF-16 high value past end of string";
goto Lerr;
}
u2 = s[i + 1];
if (u2 < 0xDC00 || u2 > 0xDFFF)
{ msg = "surrogate UTF-16 low value out of range";
goto Lerr;
}
u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00);
i += 2;
}
else if (u >= 0xDC00 && u <= 0xDFFF)
{ msg = "unpaired surrogate UTF-16 value";
goto Lerr;
}
else if (u == 0xFFFE || u == 0xFFFF)
{ msg = "illegal UTF-16 value";
goto Lerr;
}
else
i++;
}
else
{
i++;
}
idx = i;
return cast(dchar)u;
Lerr:
onUnicodeError(msg, i);
return cast(dchar)u; // dummy return
}
/********************************************************/
dchar decode(dchar[] s, inout size_t idx)
in
{
assert(idx >= 0 && idx < s.length);
}
body
{
size_t i = idx;
dchar c = s[i];
if (!isValidDchar(c))
goto Lerr;
idx = i + 1;
return c;
Lerr:
onUnicodeError("invalid UTF-32 value", i);
return c; // dummy return
}
/* =================== Encode ======================= */
void encode(inout char[] s, dchar c)
in
{
assert(isValidDchar(c));
}
body
{
char[] r = s;
if (c <= 0x7F)
{
r ~= cast(char) c;
}
else
{
char[4] buf;
uint L;
if (c <= 0x7FF)
{
buf[0] = cast(char)(0xC0 | (c >> 6));
buf[1] = cast(char)(0x80 | (c & 0x3F));
L = 2;
}
else if (c <= 0xFFFF)
{
buf[0] = cast(char)(0xE0 | (c >> 12));
buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[2] = cast(char)(0x80 | (c & 0x3F));
L = 3;
}
else if (c <= 0x10FFFF)
{
buf[0] = cast(char)(0xF0 | (c >> 18));
buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[3] = cast(char)(0x80 | (c & 0x3F));
L = 4;
}
else
{
assert(0);
}
r ~= buf[0 .. L];
}
s = r;
}
unittest
{
debug(utf) printf("utf.encode.unittest\n");
char[] s = "abcd";
encode(s, cast(dchar)'a');
assert(s.length == 5);
assert(s == "abcda");
encode(s, cast(dchar)'\u00A9');
assert(s.length == 7);
assert(s == "abcda\xC2\xA9");
//assert(s == "abcda\u00A9"); // BUG: fix compiler
encode(s, cast(dchar)'\u2260');
assert(s.length == 10);
assert(s == "abcda\xC2\xA9\xE2\x89\xA0");
}
/********************************************************/
void encode(inout wchar[] s, dchar c)
in
{
assert(isValidDchar(c));
}
body
{
wchar[] r = s;
if (c <= 0xFFFF)
{
r ~= cast(wchar) c;
}
else
{
wchar[2] buf;
buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
r ~= buf;
}
s = r;
}
void encode(inout dchar[] s, dchar c)
in
{
assert(isValidDchar(c));
}
body
{
s ~= c;
}
/* =================== Validation ======================= */
void validate(char[] s)
{
size_t len = s.length;
size_t i;
for (i = 0; i < len; )
{
decode(s, i);
}
}
void validate(wchar[] s)
{
size_t len = s.length;
size_t i;
for (i = 0; i < len; )
{
decode(s, i);
}
}
void validate(dchar[] s)
{
size_t len = s.length;
size_t i;
for (i = 0; i < len; )
{
decode(s, i);
}
}
/* =================== Conversion to UTF8 ======================= */
char[] toUTF8(char[4] buf, dchar c)
in
{
assert(isValidDchar(c));
}
body
{
if (c <= 0x7F)
{
buf[0] = cast(char) c;
return buf[0 .. 1];
}
else if (c <= 0x7FF)
{
buf[0] = cast(char)(0xC0 | (c >> 6));
buf[1] = cast(char)(0x80 | (c & 0x3F));
return buf[0 .. 2];
}
else if (c <= 0xFFFF)
{
buf[0] = cast(char)(0xE0 | (c >> 12));
buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[2] = cast(char)(0x80 | (c & 0x3F));
return buf[0 .. 3];
}
else if (c <= 0x10FFFF)
{
buf[0] = cast(char)(0xF0 | (c >> 18));
buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[3] = cast(char)(0x80 | (c & 0x3F));
return buf[0 .. 4];
}
assert(0);
}
char[] toUTF8(char[] s)
in
{
validate(s);
}
body
{
return s;
}
char[] toUTF8(wchar[] s)
{
char[] r;
size_t i;
size_t slen = s.length;
r.length = slen;
for (i = 0; i < slen; i++)
{ wchar c = s[i];
if (c <= 0x7F)
r[i] = cast(char)c; // fast path for ascii
else
{
r.length = i;
foreach (dchar c; s[i .. slen])
{
encode(r, c);
}
break;
}
}
return r;
}
char[] toUTF8(dchar[] s)
{
char[] r;
size_t i;
size_t slen = s.length;
r.length = slen;
for (i = 0; i < slen; i++)
{ dchar c = s[i];
if (c <= 0x7F)
r[i] = cast(char)c; // fast path for ascii
else
{
r.length = i;
foreach (dchar d; s[i .. slen])
{
encode(r, d);
}
break;
}
}
return r;
}
/* =================== Conversion to UTF16 ======================= */
wchar[] toUTF16(wchar[2] buf, dchar c)
in
{
assert(isValidDchar(c));
}
body
{
if (c <= 0xFFFF)
{
buf[0] = cast(wchar) c;
return buf[0 .. 1];
}
else
{
buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
return buf[0 .. 2];
}
}
wchar[] toUTF16(char[] s)
{
wchar[] r;
size_t slen = s.length;
r.length = slen;
r.length = 0;
for (size_t i = 0; i < slen; )
{
dchar c = s[i];
if (c <= 0x7F)
{
i++;
r ~= cast(wchar)c;
}
else
{
c = decode(s, i);
encode(r, c);
}
}
return r;
}
wchar* toUTF16z(char[] s)
{
wchar[] r;
size_t slen = s.length;
r.length = slen + 1;
r.length = 0;
for (size_t i = 0; i < slen; )
{
dchar c = s[i];
if (c <= 0x7F)
{
i++;
r ~= cast(wchar)c;
}
else
{
c = decode(s, i);
encode(r, c);
}
}
r ~= "\000";
return r.ptr;
}
wchar[] toUTF16(wchar[] s)
in
{
validate(s);
}
body
{
return s;
}
wchar[] toUTF16(dchar[] s)
{
wchar[] r;
size_t slen = s.length;
r.length = slen;
r.length = 0;
for (size_t i = 0; i < slen; i++)
{
encode(r, s[i]);
}
return r;
}
/* =================== Conversion to UTF32 ======================= */
dchar[] toUTF32(char[] s)
{
dchar[] r;
size_t slen = s.length;
size_t j = 0;
r.length = slen; // r[] will never be longer than s[]
for (size_t i = 0; i < slen; )
{
dchar c = s[i];
if (c >= 0x80)
c = decode(s, i);
else
i++; // c is ascii, no need for decode
r[j++] = c;
}
return r[0 .. j];
}
dchar[] toUTF32(wchar[] s)
{
dchar[] r;
size_t slen = s.length;
size_t j = 0;
r.length = slen; // r[] will never be longer than s[]
for (size_t i = 0; i < slen; )
{
dchar c = s[i];
if (c >= 0x80)
c = decode(s, i);
else
i++; // c is ascii, no need for decode
r[j++] = c;
}
return r[0 .. j];
}
dchar[] toUTF32(dchar[] s)
in
{
validate(s);
}
body
{
return s;
}
/* ================================ tests ================================== */
unittest
{
debug(utf) printf("utf.toUTF.unittest\n");
char[] c;
wchar[] w;
dchar[] d;
c = "hello";
w = toUTF16(c);
assert(w == "hello");
d = toUTF32(c);
assert(d == "hello");
c = toUTF8(w);
assert(c == "hello");
d = toUTF32(w);
assert(d == "hello");
c = toUTF8(d);
assert(c == "hello");
w = toUTF16(d);
assert(w == "hello");
c = "hel\u1234o";
w = toUTF16(c);
assert(w == "hel\u1234o");
d = toUTF32(c);
assert(d == "hel\u1234o");
c = toUTF8(w);
assert(c == "hel\u1234o");
d = toUTF32(w);
assert(d == "hel\u1234o");
c = toUTF8(d);
assert(c == "hel\u1234o");
w = toUTF16(d);
assert(w == "hel\u1234o");
c = "he\U0010AAAAllo";
w = toUTF16(c);
//foreach (wchar c; w) printf("c = x%x\n", c);
//foreach (wchar c; cast(wchar[])"he\U0010AAAAllo") printf("c = x%x\n", c);
assert(w == "he\U0010AAAAllo");
d = toUTF32(c);
assert(d == "he\U0010AAAAllo");
c = toUTF8(w);
assert(c == "he\U0010AAAAllo");
d = toUTF32(w);
assert(d == "he\U0010AAAAllo");
c = toUTF8(d);
assert(c == "he\U0010AAAAllo");
w = toUTF16(d);
assert(w == "he\U0010AAAAllo");
}